sagar007 commited on
Commit
66c498d
·
verified ·
1 Parent(s): ffe537c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -30
app.py CHANGED
@@ -1,65 +1,85 @@
1
- import torch
2
  import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  from peft import PeftModel, PeftConfig
5
- from huggingface_hub import InferenceClient
6
 
7
- # Load configuration
8
- MODEL_PATH = "sagar007/phi2_25k"
9
- peft_config = PeftConfig.from_pretrained(MODEL_PATH)
 
 
 
 
10
 
11
- # Initialize client for Zero-GPU environment
12
- client = InferenceClient()
 
 
 
 
 
 
13
 
14
- def load_model():
15
- # Load base model
16
  base_model = AutoModelForCausalLM.from_pretrained(
17
  "microsoft/phi-2",
18
- torch_dtype=torch.float16,
19
- device_map="auto",
20
  trust_remote_code=True
21
  )
22
-
23
- # Load PEFT model
 
 
 
 
 
24
  model = PeftModel.from_pretrained(base_model, MODEL_PATH)
25
- return model, AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
 
 
 
 
 
 
26
 
27
- @client.gpu(timeout=120)
28
  def generate_response(instruction, max_length=512):
 
29
  try:
30
- model, tokenizer = load_model()
31
- prompt = f"Instruction: {instruction}\nResponse:"
32
-
33
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
34
-
35
  with torch.no_grad():
36
  outputs = model.generate(
37
  **inputs,
38
  max_length=max_length,
 
39
  temperature=0.7,
40
  top_p=0.9,
41
  do_sample=True
42
  )
43
-
44
- return tokenizer.decode(outputs[0], skip_special_tokens=True).split("Response:")[-1].strip()
45
  except Exception as e:
46
- print(f"Error: {str(e)}")
47
- return "Sorry, I encountered an error. Please try again."
48
 
 
49
  def chatbot(message, history):
50
  response = generate_response(message)
51
  return response
52
 
53
  demo = gr.ChatInterface(
54
  chatbot,
55
- title="Phi-2 Zero-GPU Chat",
56
- description="Fine-tuned Phi-2 model running on Hugging Face Zero-GPU Spaces",
 
57
  examples=[
58
- ["Explain quantum computing in simple terms"],
59
- ["Write a poem about artificial intelligence"],
60
- ["How do I make a perfect omelette?"]
61
  ],
62
- cache_examples=False
63
  )
64
 
65
  if __name__ == "__main__":
 
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  from peft import PeftModel, PeftConfig
4
+ import torch
5
 
6
+ # --- 1. Check CUDA Availability and Set Device ---
7
+ if torch.cuda.is_available():
8
+ device = torch.device("cuda")
9
+ print(f"Using device: {device} ({torch.cuda.get_device_name(0)})")
10
+ else:
11
+ print("CUDA is not available. Falling back to CPU.")
12
+ device = torch.device("cpu")
13
 
14
+ # --- 2. Load Tokenizer (with error handling) ---
15
+ MODEL_PATH = "sagar007/phi2_25k"
16
+ try:
17
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
18
+ tokenizer.pad_token = tokenizer.eos_token
19
+ except Exception as e:
20
+ print(f"Error loading tokenizer: {e}")
21
+ exit()
22
 
23
+ # --- 3. Load Base Model (Optimized for GPU) ---
24
+ try:
25
  base_model = AutoModelForCausalLM.from_pretrained(
26
  "microsoft/phi-2",
27
+ torch_dtype=torch.float16, # Use float16 on GPU for efficiency
28
+ device_map="auto", # Automatically distribute model across GPUs
29
  trust_remote_code=True
30
  )
31
+ except Exception as e:
32
+ print(f"Error loading base model: {e}")
33
+ exit()
34
+
35
+ # --- 4. Load PEFT Model (Optimized for GPU) ---
36
+ try:
37
+ peft_config = PeftConfig.from_pretrained(MODEL_PATH)
38
  model = PeftModel.from_pretrained(base_model, MODEL_PATH)
39
+ except Exception as e:
40
+ print(f"Error loading PEFT model: {e}")
41
+ exit()
42
+
43
+ # Move model to the GPU
44
+ model.to(device)
45
+ model.eval()
46
 
47
+ # --- 5. Generation Function (Optimized for GPU) ---
48
  def generate_response(instruction, max_length=512):
49
+ prompt = f"Instruction: {instruction}\nResponse:"
50
  try:
51
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
 
 
 
 
52
  with torch.no_grad():
53
  outputs = model.generate(
54
  **inputs,
55
  max_length=max_length,
56
+ num_return_sequences=1,
57
  temperature=0.7,
58
  top_p=0.9,
59
  do_sample=True
60
  )
61
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
62
+ return response.split("Response:")[1].strip()
63
  except Exception as e:
64
+ print(f"Error during generation: {e}")
65
+ return "Error during response generation."
66
 
67
+ # --- 6. Gradio Interface ---
68
  def chatbot(message, history):
69
  response = generate_response(message)
70
  return response
71
 
72
  demo = gr.ChatInterface(
73
  chatbot,
74
+ title="Fine-tuned Phi-2 Chatbot (GPU)",
75
+ description="This is a chatbot using a fine-tuned version of the Phi-2 model, running on GPU.",
76
+ theme="default",
77
  examples=[
78
+ "Explain the concept of machine learning.",
79
+ "Write a short story about a robot learning to paint.",
80
+ "What are some effective ways to reduce stress?",
81
  ],
82
+ cache_examples=False, # You can enable caching now
83
  )
84
 
85
  if __name__ == "__main__":