Spaces:

alaamostafa
/

Mistral-7B-Deploy

Sleeping

App Files Files Community

alaamostafa commited on Mar 8

Commit

3e19615

verified ·

1 Parent(s): 19b4fad

Create app.py

Browse files

Files changed (1) hide show

app.py +62 -0

app.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import gradio as gr
+from unsloth import FastLanguageModel
+import torch
+# Load your model
+model, tokenizer = FastLanguageModel.from_pretrained(
+    model_name="alaamostafa/Mistral-7B-Unsloth",  # Use your uploaded model
+    max_seq_length=2048,
+    load_in_4bit=True,
+)
+# Enable faster inference
+FastLanguageModel.for_inference(model)
+# Set up chat template
+from unsloth.chat_templates import get_chat_template
+tokenizer = get_chat_template(
+    tokenizer,
+    chat_template="chatml",
+    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
+    map_eos_token=True,
+)
+# Text generation function
+def generate_text(message, history):
+    messages = []
+    for human, assistant in history:
+        messages.append({"from": "human", "value": human})
+        messages.append({"from": "gpt", "value": assistant})
+    # Add the latest message
+    messages.append({"from": "human", "value": message})
+    # Format with chat template
+    inputs = tokenizer.apply_chat_template(
+        messages,
+        tokenize=True,
+        add_generation_prompt=True,
+        return_tensors="pt"
+    ).to("cuda" if torch.cuda.is_available() else "cpu")
+    # Generate response
+    outputs = model.generate(
+        input_ids=inputs,
+        max_new_tokens=512,
+        temperature=0.7,
+        top_p=0.9,
+        use_cache=True
+    )
+    response = tokenizer.decode(outputs[0][inputs.shape[1]:], skip_special_tokens=True)
+    return response
+# Create Gradio interface
+demo = gr.ChatInterface(
+    fn=generate_text,
+    title="Mistral-7B Chatbot",
+    description="A fine-tuned Mistral-7B model using Unsloth."
+)
+# Launch the app
+demo.launch()