alaamostafa commited on
Commit
3e19615
·
verified ·
1 Parent(s): 19b4fad

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -0
app.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from unsloth import FastLanguageModel
3
+ import torch
4
+
5
+ # Load your model
6
+ model, tokenizer = FastLanguageModel.from_pretrained(
7
+ model_name="alaamostafa/Mistral-7B-Unsloth", # Use your uploaded model
8
+ max_seq_length=2048,
9
+ load_in_4bit=True,
10
+ )
11
+
12
+ # Enable faster inference
13
+ FastLanguageModel.for_inference(model)
14
+
15
+ # Set up chat template
16
+ from unsloth.chat_templates import get_chat_template
17
+ tokenizer = get_chat_template(
18
+ tokenizer,
19
+ chat_template="chatml",
20
+ mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
21
+ map_eos_token=True,
22
+ )
23
+
24
+ # Text generation function
25
+ def generate_text(message, history):
26
+ messages = []
27
+ for human, assistant in history:
28
+ messages.append({"from": "human", "value": human})
29
+ messages.append({"from": "gpt", "value": assistant})
30
+
31
+ # Add the latest message
32
+ messages.append({"from": "human", "value": message})
33
+
34
+ # Format with chat template
35
+ inputs = tokenizer.apply_chat_template(
36
+ messages,
37
+ tokenize=True,
38
+ add_generation_prompt=True,
39
+ return_tensors="pt"
40
+ ).to("cuda" if torch.cuda.is_available() else "cpu")
41
+
42
+ # Generate response
43
+ outputs = model.generate(
44
+ input_ids=inputs,
45
+ max_new_tokens=512,
46
+ temperature=0.7,
47
+ top_p=0.9,
48
+ use_cache=True
49
+ )
50
+
51
+ response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
52
+ return response
53
+
54
+ # Create Gradio interface
55
+ demo = gr.ChatInterface(
56
+ fn=generate_text,
57
+ title="Mistral-7B Chatbot",
58
+ description="A fine-tuned Mistral-7B model using Unsloth."
59
+ )
60
+
61
+ # Launch the app
62
+ demo.launch()