File size: 3,026 Bytes
809fde8 effb8bc 6447476 bf2fe7e 6447476 e7e5617 effb8bc e7e5617 effb8bc e7e5617 effb8bc 6447476 effb8bc cfb8dea 6447476 afd9c2f 6447476 cfb8dea 6447476 afd9c2f 6447476 afd9c2f 6447476 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
sys_message = """
This model can generate untruths, lies or inappropriate things. Only for testing and validation.
"""
pipe = pipeline("text-generation", model="tevykuch/sftsl0th", device=0, framework="pt")
# Configuration settings for model generation (example)
generation_config = {
"max_new_tokens": 2048,
"temperature": 0.50,
"top_p": 0.95,
"top_k": 30,
"repetition_penalty": 1.1,
"eos_token_id": pipe.tokenizer.eos_token_id
}
# tokenizer = AutoTokenizer.from_pretrained("tevykuch/sftsl0th")
# llm = AutoModelForCausalLM.from_pretrained("tevykuch/sftsl0th")
# def stream(prompt):
# # Tokenize the prompt
# inputs = tokenizer.encode(prompt, return_tensors="pt")
# # Generate a response
# output_ids = llm.generate(inputs, **generation_config)
# # Decode the generated ids to a string
# response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
# return response
def stream(prompt):
outputs = pipe(prompt, **generation_config)
response = outputs[0]["generated_text"]
return response
chat_interface = gr.ChatInterface(
fn=stream,
stop_btn=None,
examples=[
["ααΎαααα
ααααααΆααααΈααΆαα’αααΈαααα?"],
["ααΎααΎαα’αΆα
ααΆαααααααααΆααααα»αααα·ααΆααΆααααΆαααΌα
ααααα
?"],
["αααααΆααααΈαααααααΆαααα’αααααααΌαααααΎααΆααααααα
α
α·αααααααΆαα"],
["αααααα’ααααααααΆααα½ααααα
ααα"],
["αααααααΏαααααΈαα½ααααα»αααΆαααααΎαα·αααααααααααααα»ααααααΈααΈα’αααΈαα½α―ααααααααΌαααααΎααΆααααααα
α
α·αααα’αΆααΈαααααααΆαααα½αα"],
["ααΆααααααααααααααααααααΆααααα α»αα’αααααΆαα·αα»ααααα·αααααααΆαααα"]
],
)
with gr.Blocks() as demo:
gr.HTML("<h1><center> sl0th inference tester only (not final) <h1><center>")
gr.HTML(
"<h4 style='text-align: center'>"
"<a href='https://huggingface.co/tevykuch/sl0th' target='_blank'>Model: Sl0th Mistral 7b 0.2</a> | "
"</h4>"
)
gr.HTML("<p><center>Finetune here <a href='https://huggingface.co/unsloth/mistral-7b-bnb-4bit' target='_blank'>Mistral 7b</a> thanks dataset maker (my coworker) <a href='https://huggingface.co/datasets/metythorn/khmerllm-dataset-alpaca-52k-v1'>Alpaca-data-pt-br</a>.<p><center>")
chat_interface.render()
gr.Markdown(sys_message)
gr.DuplicateButton(value="Duplicate the Magic", elem_id="duplicate-button")
if __name__ == "__main__":
demo.queue(max_size=10).launch() |