nouamanetazi HF Staff commited on
Commit
48e09b8
·
verified ·
1 Parent(s): 8093b01

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -25
app.py CHANGED
@@ -7,6 +7,7 @@ from datasets import load_dataset
7
  from huggingface_hub import CommitScheduler
8
  from pathlib import Path
9
  import uuid
 
10
 
11
 
12
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
@@ -37,15 +38,22 @@ examples = [
37
  , 256, 0.7, 0.9, 150, 8, 1.5],
38
  ]
39
 
40
- #inf_dataset=load_dataset("atlasia/atlaset_inference_ds",token=token,split="test",name="llm")
41
  submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
 
 
 
 
 
42
  scheduler = CommitScheduler(
43
- repo_id="atlasia/atlaset_inference_ds",
44
- repo_type="dataset",
45
- folder_path=submit_file,
46
- every=5,
47
- token=token
48
- )
 
 
49
  @spaces.GPU
50
  def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150, num_beams=8, repetition_penalty=1.5):
51
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -63,33 +71,57 @@ def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150,
63
  eos_token_id=tokenizer.eos_token_id, # Explicit eos token
64
  )
65
  result=tokenizer.decode(output[0], skip_special_tokens=True)
66
- #inf_dataset.add_item({"inputs":prompt,"outputs":result,"params":f"{max_length},{temperature},{top_p},{top_k},{num_beams},{repetition_penalty}"})
67
  save_feedback(prompt,result,f"{max_length},{temperature},{top_p},{top_k},{num_beams},{repetition_penalty}")
68
  return result
69
 
70
- def save_feedback(input,output,params) -> None:
 
 
 
 
71
  with scheduler.lock:
72
- with submit_file.open("a") as f:
73
  f.write(json.dumps({"input": input, "output": output, "params": params}))
74
  f.write("\n")
75
 
76
  if __name__ == "__main__":
77
  # Create the Gradio interface
78
  with gr.Blocks() as app:
79
- gr.Interface(
80
- fn=generate_text,
81
- inputs=[
82
- gr.Textbox(label="Prompt: دخل النص بالدارجة"),
83
- gr.Slider(8, 4096, value=256, label="Max Length"),
84
- gr.Slider(0.0, 2, value=0.7, label="Temperature"),
85
- gr.Slider(0.0, 1.0, value=0.9, label="Top-p"),
86
- gr.Slider(1, 10000, value=150, label="Top-k"),
87
- gr.Slider(1, 20, value=8, label="Number of Beams"),
88
- gr.Slider(0.0, 100.0, value=1.5, label="Repetition Penalty"),
89
- ],
90
- outputs=gr.Textbox(label="Generated Text in Moroccan Darija"),
91
- title="Moroccan Darija LLM",
92
- description="Enter a prompt and get AI-generated text using our pretrained LLM on Moroccan Darija.",
 
 
 
93
  examples=examples,
 
 
 
 
 
 
 
 
 
 
 
94
  )
95
- app.launch()
 
 
 
 
 
 
 
 
7
  from huggingface_hub import CommitScheduler
8
  from pathlib import Path
9
  import uuid
10
+ import json
11
 
12
 
13
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
 
38
  , 256, 0.7, 0.9, 150, 8, 1.5],
39
  ]
40
 
41
+ # Define the file where to save the data
42
  submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
43
+ feedback_file = submit_file
44
+
45
+ # Create directory if it doesn't exist
46
+ submit_file.parent.mkdir(exist_ok=True, parents=True)
47
+
48
  scheduler = CommitScheduler(
49
+ repo_id="atlasia/atlaset_inference_ds",
50
+ repo_type="dataset",
51
+ folder_path=submit_file.parent,
52
+ path_in_repo="data",
53
+ every=5,
54
+ token=token
55
+ )
56
+
57
  @spaces.GPU
58
  def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150, num_beams=8, repetition_penalty=1.5):
59
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
71
  eos_token_id=tokenizer.eos_token_id, # Explicit eos token
72
  )
73
  result=tokenizer.decode(output[0], skip_special_tokens=True)
 
74
  save_feedback(prompt,result,f"{max_length},{temperature},{top_p},{top_k},{num_beams},{repetition_penalty}")
75
  return result
76
 
77
+ def save_feedback(input, output, params) -> None:
78
+ """
79
+ Append input/outputs and parameters to a JSON Lines file using a thread lock
80
+ to avoid concurrent writes from different users.
81
+ """
82
  with scheduler.lock:
83
+ with feedback_file.open("a") as f:
84
  f.write(json.dumps({"input": input, "output": output, "params": params}))
85
  f.write("\n")
86
 
87
  if __name__ == "__main__":
88
  # Create the Gradio interface
89
  with gr.Blocks() as app:
90
+ with gr.Row():
91
+ with gr.Column():
92
+ prompt_input = gr.Textbox(label="Prompt: دخل النص بالدارجة")
93
+ max_length = gr.Slider(8, 4096, value=256, label="Max Length")
94
+ temperature = gr.Slider(0.0, 2, value=0.7, label="Temperature")
95
+ top_p = gr.Slider(0.0, 1.0, value=0.9, label="Top-p")
96
+ top_k = gr.Slider(1, 10000, value=150, label="Top-k")
97
+ num_beams = gr.Slider(1, 20, value=8, label="Number of Beams")
98
+ repetition_penalty = gr.Slider(0.0, 100.0, value=1.5, label="Repetition Penalty")
99
+
100
+ submit_btn = gr.Button("Generate")
101
+
102
+ with gr.Column():
103
+ output_text = gr.Textbox(label="Generated Text in Moroccan Darija")
104
+
105
+ # Examples section with caching
106
+ gr.Examples(
107
  examples=examples,
108
+ inputs=[prompt_input, max_length, temperature, top_p, top_k, num_beams, repetition_penalty],
109
+ outputs=output_text,
110
+ fn=generate_text,
111
+ cache_examples=True
112
+ )
113
+
114
+ # Button action
115
+ submit_btn.click(
116
+ generate_text,
117
+ inputs=[prompt_input, max_length, temperature, top_p, top_k, num_beams, repetition_penalty],
118
+ outputs=output_text
119
  )
120
+
121
+ gr.Markdown("""
122
+ # Moroccan Darija LLM
123
+
124
+ Enter a prompt and get AI-generated text using our pretrained LLM on Moroccan Darija.
125
+ """)
126
+
127
+ app.launch()