File size: 3,026 Bytes
809fde8
effb8bc
6447476
 
 
 
 
bf2fe7e
6447476
e7e5617
 
 
 
 
 
 
effb8bc
e7e5617
 
 
effb8bc
 
 
e7e5617
effb8bc
 
 
 
 
 
 
 
6447476
effb8bc
 
 
 
cfb8dea
6447476
 
 
 
 
 
 
 
 
 
 
 
 
 
afd9c2f
6447476
 
cfb8dea
6447476
 
afd9c2f
6447476
 
afd9c2f
6447476
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import gradio as gr 
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

sys_message = """
This model can generate untruths, lies or inappropriate things. Only for testing and validation. 
"""

pipe = pipeline("text-generation", model="tevykuch/sftsl0th", device=0, framework="pt")

# Configuration settings for model generation (example)
generation_config = {
    "max_new_tokens": 2048,
    "temperature": 0.50,
    "top_p": 0.95,
    "top_k": 30,
    "repetition_penalty": 1.1,
    "eos_token_id": pipe.tokenizer.eos_token_id 
}



# tokenizer = AutoTokenizer.from_pretrained("tevykuch/sftsl0th")
# llm = AutoModelForCausalLM.from_pretrained("tevykuch/sftsl0th")

# def stream(prompt):
#     # Tokenize the prompt
#     inputs = tokenizer.encode(prompt, return_tensors="pt")
#     # Generate a response
#     output_ids = llm.generate(inputs, **generation_config)
#     # Decode the generated ids to a string
#     response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
#     return response

def stream(prompt):
    outputs = pipe(prompt, **generation_config)
    response = outputs[0]["generated_text"]
    return response

chat_interface = gr.ChatInterface(
    fn=stream,
    stop_btn=None,
    examples=[
        ["αžαžΎαž–αžŽαŸŒαž…αž˜αŸ’αž”αž„αž‘αžΆαŸ†αž„αž”αžΈαž˜αžΆαž“αž’αŸ’αžœαžΈαžαŸ’αž›αŸ‡?"],
        ["αžαžΎαž™αžΎαž„αž’αžΆαž…αž€αžΆαžαŸ‹αž”αž“αŸ’αžαž™αž€αžΆαžšαž”αŸ†αž–αž»αž›αž”αžšαž·αž™αžΆαž€αžΆαžŸαž™αŸ‰αžΆαž„αžŠαžΌαž…αž˜αŸ’αžαŸαž…?"],
        ["αžšαŸ€αž”αžšαžΆαž”αŸ‹αž–αžΈαž–αŸαž›αžœαŸαž›αžΆαžŠαŸ‚αž›αž’αŸ’αž“αž€αžαŸ’αžšαžΌαžœαž’αŸ’αžœαžΎαž€αžΆαžšαžŸαž˜αŸ’αžšαŸαž…αž…αž·αžαŸ’αžαž›αŸ†αž”αžΆαž€αŸ”"],
        ["αž€αŸ†αžŽαžαŸ‹αž’αžαŸ’αžαžŸαž‰αŸ’αž‰αžΆαžŽαž˜αž½αž™αžŸαŸαžŸαž…αŸαž‰αŸ”"],
        ["αžŸαžšαžŸαŸαžšαžšαžΏαž„αžαŸ’αž›αžΈαž˜αž½αž™αž€αŸ’αž“αž»αž„αž€αžΆαžšαž’αŸ’αžœαžΎαžœαž·αžŸαŸ„αž’αž“αž€αž˜αŸ’αž˜αžšαž”αžŸαŸ‹αž”αž»αž‚αŸ’αž‚αž›αž‘αžΈαž”αžΈαž’αŸ†αž–αžΈαžαž½αž―αž€αžŠαŸ‚αž›αžαŸ’αžšαžΌαžœαž’αŸ’αžœαžΎαž€αžΆαžšαžŸαž˜αŸ’αžšαŸαž…αž…αž·αžαŸ’αžαž’αžΆαž‡αžΈαž–αžŠαŸαžŸαŸ†αžαžΆαž“αŸ‹αž˜αž½αž™αŸ”"],
        ["αžœαžΆαž™αžαž˜αŸ’αž›αŸƒαž”αŸ’αžšαž™αŸ„αž‚αž“αŸαŸ‡αžŸαž˜αŸ’αžšαžΆαž”αŸ‹αž€αŸ†αž αž»αžŸαž’αž€αŸ’αžαžšαžΆαžœαž·αžšαž»αž‘αŸ’αž’αž“αž·αž„αžœαŸαž™αŸ’αž™αžΆαž€αžšαžŽαŸ"]
    ],
)

with gr.Blocks() as demo:
    gr.HTML("<h1><center> sl0th inference tester only (not final) <h1><center>")
    gr.HTML(
			"<h4 style='text-align: center'>"
				"<a href='https://huggingface.co/tevykuch/sl0th' target='_blank'>Model: Sl0th Mistral 7b 0.2</a> | "
			"</h4>"
		)
    gr.HTML("<p><center>Finetune here <a href='https://huggingface.co/unsloth/mistral-7b-bnb-4bit' target='_blank'>Mistral 7b</a> thanks dataset maker (my coworker) <a href='https://huggingface.co/datasets/metythorn/khmerllm-dataset-alpaca-52k-v1'>Alpaca-data-pt-br</a>.<p><center>")
    chat_interface.render()
    gr.Markdown(sys_message)
    gr.DuplicateButton(value="Duplicate the Magic", elem_id="duplicate-button")
    
if __name__ == "__main__":
    demo.queue(max_size=10).launch()