Spaces:
Sleeping
Sleeping
File size: 2,792 Bytes
d44918c 8edd5de d44918c 8edd5de d44918c 8edd5de d44918c 8edd5de d44918c 8edd5de d44918c 8edd5de d44918c b5a6947 d44918c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import gradio as gr
from threading import Thread
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, TextIteratorStreamer
model_id = "rasyosef/llama-3.2-amharic-64k-1024"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
def generate(prompt):
prompt_length = len(tokenizer.tokenize(prompt))
if prompt_length >= 128:
yield prompt + "\n\nPrompt is too long. It needs to be less than 128 tokens."
else:
inputs = tokenizer(prompt, return_tensors="pt")
max_new_tokens = max(0, 128 - prompt_length)
streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=False, skip_special_tokens=True, timeout=300.0)
thread = Thread(
target=model.generate,
kwargs={
"inputs": inputs["input_ids"],
"attention_mask": inputs["attention_mask"],
"max_new_tokens": max_new_tokens,
"temperature": 0.3,
"do_sample": True,
"top_k": 8,
"top_p": 0.8,
"repetition_penalty": 1.25,
"streamer": streamer,
"pad_token_id": tokenizer.pad_token_id,
"eos_token_id": tokenizer.eos_token_id
})
thread.start()
generated_text = ""
for word in streamer:
generated_text += word
response = generated_text.strip()
yield response
with gr.Blocks(css="#prompt_textbox textarea {color: blue}") as demo:
gr.Markdown("""
# Llama 3.2 Amharic
This is a demo for [llama-3.2-amharic](https://huggingface.co/rasyosef/llama-3.2-amharic-64k-1024), a smaller version of Meta's [Llama-3.2-1B](https://huggingface.co/meta-llama/Llama-3.2-1B) decoder transformer model pretrained for 3 days on `210 million` tokens of **Amharic** text. This model has `179 million` parameters and a context size of `1024` tokens. This is a base model and hasn't undergone any supervised finetuing yet.
Please **enter a prompt** and click the **Generate** button to generate completions for the prompt.
#### Text generation parameters:
- `temperature` : **0.3**
- `do_sample` : **True**
- `top_k` : **8**
- `top_p` : **0.8**
- `repetition_penalty` : **1.25**
""")
prompt = gr.Textbox(label="Prompt", placeholder="Enter prompt here", lines=4, interactive=True, elem_id="prompt_textbox")
with gr.Row():
with gr.Column():
gen = gr.Button("Generate")
with gr.Column():
btn = gr.ClearButton([prompt])
gen.click(generate, inputs=[prompt], outputs=[prompt])
examples = gr.Examples(
examples=[
"α α²α΅ α α α£",
"α α₯αααα αα¬αα¨α αα",
"αα¬αα³αα΅ αΆααα΅ α΅α«αα",
"α αα΅αα α α°α£α£α",
],
inputs=[prompt],
)
demo.queue().launch(debug=True) |