sagar007 commited on
Commit
ffe537c
·
verified ·
1 Parent(s): 0bfd470

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -41
app.py CHANGED
@@ -2,46 +2,49 @@ import torch
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel, PeftConfig
5
- import spaces
6
 
7
- # Check if CUDA is available and set the device
8
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
- print(f"Using device: {device}")
10
-
11
- # Load model and tokenizer#
12
  MODEL_PATH = "sagar007/phi2_25k"
13
- tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
14
- tokenizer.pad_token = tokenizer.eos_token
15
 
16
- base_model = AutoModelForCausalLM.from_pretrained(
17
- "microsoft/phi-2",
18
- torch_dtype=torch.float16 if device.type == "cuda" else torch.float32,
19
- device_map="auto",
20
- trust_remote_code=True
21
- )
22
 
23
- peft_config = PeftConfig.from_pretrained(MODEL_PATH)
24
- model = PeftModel.from_pretrained(base_model, MODEL_PATH)
25
- model.to(device)
26
- model.eval()
 
 
 
 
 
 
 
 
27
 
28
- @spaces.GPU(duration=60)
29
  def generate_response(instruction, max_length=512):
30
- prompt = f"Instruction: {instruction}\nResponse:"
31
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
32
-
33
- with torch.no_grad():
34
- outputs = model.generate(
35
- **inputs,
36
- max_length=max_length,
37
- num_return_sequences=1,
38
- temperature=0.7,
39
- top_p=0.9,
40
- do_sample=True
41
- )
42
-
43
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
44
- return response.split("Response:")[1].strip()
 
 
 
 
45
 
46
  def chatbot(message, history):
47
  response = generate_response(message)
@@ -49,15 +52,14 @@ def chatbot(message, history):
49
 
50
  demo = gr.ChatInterface(
51
  chatbot,
52
- title="Fine-tuned Phi-2 Chatbot",
53
- description="This is a chatbot using a fine-tuned version of the Phi-2 model.",
54
- theme="default",
55
  examples=[
56
- "Explain the concept of machine learning.",
57
- "Write a short story about a robot learning to paint.",
58
- "What are some effective ways to reduce stress?",
59
  ],
60
- cache_examples=True,
61
  )
62
 
63
  if __name__ == "__main__":
 
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel, PeftConfig
5
+ from huggingface_hub import InferenceClient
6
 
7
+ # Load configuration
 
 
 
 
8
  MODEL_PATH = "sagar007/phi2_25k"
9
+ peft_config = PeftConfig.from_pretrained(MODEL_PATH)
 
10
 
11
+ # Initialize client for Zero-GPU environment
12
+ client = InferenceClient()
 
 
 
 
13
 
14
+ def load_model():
15
+ # Load base model
16
+ base_model = AutoModelForCausalLM.from_pretrained(
17
+ "microsoft/phi-2",
18
+ torch_dtype=torch.float16,
19
+ device_map="auto",
20
+ trust_remote_code=True
21
+ )
22
+
23
+ # Load PEFT model
24
+ model = PeftModel.from_pretrained(base_model, MODEL_PATH)
25
+ return model, AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
26
 
27
+ @client.gpu(timeout=120)
28
  def generate_response(instruction, max_length=512):
29
+ try:
30
+ model, tokenizer = load_model()
31
+ prompt = f"Instruction: {instruction}\nResponse:"
32
+
33
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
34
+
35
+ with torch.no_grad():
36
+ outputs = model.generate(
37
+ **inputs,
38
+ max_length=max_length,
39
+ temperature=0.7,
40
+ top_p=0.9,
41
+ do_sample=True
42
+ )
43
+
44
+ return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Response:")[-1].strip()
45
+ except Exception as e:
46
+ print(f"Error: {str(e)}")
47
+ return "Sorry, I encountered an error. Please try again."
48
 
49
  def chatbot(message, history):
50
  response = generate_response(message)
 
52
 
53
  demo = gr.ChatInterface(
54
  chatbot,
55
+ title="Phi-2 Zero-GPU Chat",
56
+ description="Fine-tuned Phi-2 model running on Hugging Face Zero-GPU Spaces",
 
57
  examples=[
58
+ ["Explain quantum computing in simple terms"],
59
+ ["Write a poem about artificial intelligence"],
60
+ ["How do I make a perfect omelette?"]
61
  ],
62
+ cache_examples=False
63
  )
64
 
65
  if __name__ == "__main__":