DexterSptizu commited on
Commit
5101e06
1 Parent(s): b7ffad1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ # Initialize model and tokenizer
6
+ checkpoint = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
7
+ device = "cuda" if torch.cuda.is_available() else "cpu"
8
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
9
+ model = AutoModelForCausalLM.from_pretrained(checkpoint).to(device)
10
+
11
+ def generate_response(prompt, max_tokens, temperature, top_p):
12
+ try:
13
+ # Format input as chat message
14
+ messages = [{"role": "user", "content": prompt}]
15
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False)
16
+
17
+ # Encode and generate
18
+ inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
19
+ outputs = model.generate(
20
+ inputs,
21
+ max_new_tokens=max_tokens,
22
+ temperature=temperature,
23
+ top_p=top_p,
24
+ do_sample=True
25
+ )
26
+
27
+ # Decode and return response
28
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
29
+ return response
30
+ except Exception as e:
31
+ return f"Error: {str(e)}"
32
+
33
+ # Create Gradio interface
34
+ iface = gr.Interface(
35
+ fn=generate_response,
36
+ inputs=[
37
+ gr.Textbox(
38
+ label="Enter your prompt",
39
+ placeholder="What would you like to know?",
40
+ lines=3
41
+ ),
42
+ gr.Slider(
43
+ minimum=10,
44
+ maximum=200,
45
+ value=50,
46
+ step=10,
47
+ label="Maximum Tokens"
48
+ ),
49
+ gr.Slider(
50
+ minimum=0.1,
51
+ maximum=1.0,
52
+ value=0.2,
53
+ step=0.1,
54
+ label="Temperature"
55
+ ),
56
+ gr.Slider(
57
+ minimum=0.1,
58
+ maximum=1.0,
59
+ value=0.9,
60
+ step=0.1,
61
+ label="Top P"
62
+ )
63
+ ],
64
+ outputs=gr.Textbox(label="Generated Response", lines=5),
65
+ title="SmolLM2-1.7B-Instruct Demo",
66
+ description="Generate responses using the SmolLM2-1.7B-Instruct model",
67
+ examples=[
68
+ ["What is the capital of France?", 50, 0.2, 0.9],
69
+ ["Explain quantum computing in simple terms.", 100, 0.3, 0.9],
70
+ ["Write a short poem about nature.", 150, 0.7, 0.9]
71
+ ]
72
+ )
73
+
74
+ # Launch the application
75
+ if __name__ == "__main__":
76
+ iface.launch(share=True)