rasyosef commited on
Commit
d44918c
Β·
verified Β·
1 Parent(s): c1e53fd

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from threading import Thread
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextIteratorStreamer
4
+
5
+ model_id = "rasyosef/llama-3.2-amharic-28k-512"
6
+
7
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
8
+ model = AutoModelForCausalLM.from_pretrained(model_id)
9
+
10
+ def generate(prompt):
11
+ prompt_length = len(tokenizer.tokenize(prompt))
12
+ if prompt_length >= 128:
13
+ yield prompt + "\n\nPrompt is too long. It needs to be less than 128 tokens."
14
+ else:
15
+ inputs = tokenizer(prompt, return_tensors="pt")
16
+ inputs["input_ids"][0][0] = 0
17
+ print(inputs)
18
+
19
+ max_new_tokens = max(0, 128 - prompt_length)
20
+ streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=False, skip_special_tokens=True, timeout=300.0)
21
+ thread = Thread(
22
+ target=model.generate,
23
+ kwargs={
24
+ "inputs": inputs["input_ids"],
25
+ "attention_mask": inputs["attention_mask"],
26
+ "max_new_tokens": max_new_tokens,
27
+ "temperature": 0.4,
28
+ "do_sample": True,
29
+ "top_k": 8,
30
+ "top_p": 0.8,
31
+ "repetition_penalty": 1.4,
32
+ "streamer": streamer,
33
+ "pad_token_id": tokenizer.pad_token_id,
34
+ "eos_token_id": tokenizer.eos_token_id
35
+ })
36
+ thread.start()
37
+
38
+ generated_text = ""
39
+ for word in streamer:
40
+ generated_text += word
41
+ response = generated_text.strip()
42
+ yield response
43
+
44
+ with gr.Blocks(css="#prompt_textbox textarea {color: blue}") as demo:
45
+ gr.Markdown("""
46
+ # Llama 3.2 Amharic
47
+ This is a demo for [llama-3.2-amharic](https://huggingface.co/rasyosef/llama-3.2-amharic-28k-512), a smaller version of Meta's [Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) decoder transformer model pretrained for 1.5 days on `276 million` tokens of **Amharic** text. This model has `155 million` parameters and a context size of `512` tokens. This is a base model and hasn't undergone any supervised finetuing yet.
48
+
49
+ Please **enter a prompt** and click the **Generate** button to generate completions for the prompt.
50
+ #### Text generation parameters:
51
+ - `temperature` : **0.4**
52
+ - `do_sample` : **True**
53
+ - `top_k` : **8**
54
+ - `top_p` : **0.8**
55
+ - `repetition_penalty` : **1.4**
56
+ """)
57
+
58
+ prompt = gr.Textbox(label="Prompt", placeholder="Enter prompt here", lines=4, interactive=True, elem_id="prompt_textbox")
59
+ with gr.Row():
60
+ with gr.Column():
61
+ gen = gr.Button("Generate")
62
+ with gr.Column():
63
+ btn = gr.ClearButton([prompt])
64
+ gen.click(generate, inputs=[prompt], outputs=[prompt])
65
+ examples = gr.Examples(
66
+ examples=[
67
+ "αŠ α‹²αˆ΅ αŠ α‰ α‰£",
68
+ "α‰ αŠ₯αŠ•αŒαˆŠα‹™ α•αˆ¬αˆšα‹¨αˆ­ ሊግ",
69
+ "α•αˆ¬α‹šα‹³αŠ•α‰΅ α‹ΆαŠ“αˆα‹΅ α‰΅αˆ«αˆα•",
70
+ "α‰ αˆ˜αˆ΅α‰€αˆ αŠ α‹°α‰£α‰£α‹­"
71
+ ],
72
+ inputs=[prompt],
73
+ )
74
+ demo.queue().launch(debug=True)