marwashahid commited on
Commit
b65806d
Β·
1 Parent(s): dd5eeda

Update from Kaggle notebook

Browse files
Files changed (1) hide show
  1. app.py +394 -4
app.py CHANGED
@@ -1,5 +1,329 @@
1
  import gradio as gr
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  def process_input(user_input):
4
  """Process user input through the model and return the result."""
5
  messages = [{"role": "user", "content": user_input}]
@@ -22,13 +346,79 @@ demo = gr.Interface(
22
 
23
  demo.launch(share=True)
24
 
25
- output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth"
26
- torch.save(model.state_dict(), output_weights_path)
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
 
29
  import shutil
30
- shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir)
31
- print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip")
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  import os
34
  from getpass import getpass
 
1
  import gradio as gr
2
 
3
+ get_ipython().run_line_magic('pip', 'install transformers==4.45.0 accelerate==0.26.0 bitsandbytes==0.43.3')
4
+
5
+ import torch
6
+ print(torch.__version__)
7
+ print(torch.cuda.is_available())
8
+ print(torch.version.cuda)
9
+ get_ipython().system('pip show bitsandbytes')
10
+ import bitsandbytes
11
+ print(bitsandbytes.__version__)
12
+ import bitsandbytes as bnb
13
+ import torch
14
+ x = torch.randn(10, device="cuda")
15
+ y = bnb.functional.quantize_4bit(x)
16
+ print("Quantization worked!")
17
+ import bitsandbytes.nn
18
+ import bitsandbytes.functional
19
+ print("Submodules imported successfully!")
20
+
21
+ import transformers
22
+ transformers.utils.is_bitsandbytes_available = lambda: True
23
+ import torch
24
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
25
+ import os
26
+ import gc
27
+
28
+ torch.cuda.empty_cache()
29
+ gc.collect()
30
+
31
+ bnb_config = BitsAndBytesConfig(
32
+ load_in_4bit=True,
33
+ bnb_4bit_quant_type="nf4",
34
+ bnb_4bit_compute_dtype=torch.bfloat16,
35
+ bnb_4bit_use_double_quant=True,
36
+ )
37
+
38
+ # Define model and tokenizer
39
+ model_name = "deepseek-ai/deepseek-math-7b-instruct"
40
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
41
+
42
+ # Set padding token if not already set
43
+ if tokenizer.pad_token is None:
44
+ tokenizer.pad_token = tokenizer.eos_token
45
+
46
+ model = AutoModelForCausalLM.from_pretrained(
47
+ model_name,
48
+ quantization_config=bnb_config,
49
+ device_map="auto",
50
+ trust_remote_code=True,
51
+ )
52
+
53
+ from peft import LoraConfig, get_peft_model
54
+
55
+ # Define LoRA configuration
56
+ lora_config = LoraConfig(
57
+ r=16, # Rank of the LoRA adaptation
58
+ lora_alpha=32, # Scaling factor
59
+ target_modules=["q_proj", "v_proj"], # Target attention layers (adjust based on model architecture)
60
+ lora_dropout=0.05, # Dropout for regularization
61
+ bias="none", # No bias in LoRA layers
62
+ task_type="CAUSAL_LM", # Task type for causal language modeling
63
+ )
64
+
65
+ # Apply LoRA to the model
66
+ model = get_peft_model(model, lora_config)
67
+ model.print_trainable_parameters() # Verify trainable parameters
68
+
69
+ dataset = [
70
+ {
71
+ "problem": "🍎 + 🍎 + 🍎 = 12",
72
+ "output": "🍎 = 4 Explanation: If three apples equal 12, then each apple equals 4 as 12/3 is 4."
73
+ },
74
+ {
75
+ "problem": "🍌 + 🍌 = 10",
76
+ "output": "🍌 = 5 Explanation: If two bananas equal 10, then each banana equals 5."
77
+ },
78
+ {
79
+ "problem": "🍊 Γ— 3 = 15",
80
+ "output": "🍊 = 5 Explanation: If an orange multiplied by 3 equals 15, then each orange equals 5."
81
+ },
82
+ {
83
+ "problem": "πŸ‡ Γ· 2 = 6",
84
+ "output": "πŸ‡ = 12 Explanation : If grapes divided by 2 equals 6, then grapes equals 12."
85
+ },
86
+ {
87
+ "problem": "πŸ“ + πŸ“ + πŸ“ + πŸ“ = 20",
88
+ "output": "πŸ“ = 5 Explanation : If four strawberries equal 20, then each strawberry equals 5."
89
+ },
90
+ {
91
+ "problem": "🍍 - πŸ‰ = 3, 🍍 + πŸ‰ = 15",
92
+ "output": "🍍 = 9, πŸ‰ = 6 Explanation : Using the system of equations, we can solve that pineapple equals 9 and watermelon equals 6."
93
+ },
94
+ {
95
+ "problem": "πŸ’ + πŸ’ + 🍐 = 16, 🍐 + 🍐 + πŸ’ = 19",
96
+ "output": "πŸ’ = 5, 🍐 = 6 Explanation : Solving the system of equations: 2πŸ’ + 🍐 = 16 and πŸ’ + 2🍐 = 19."
97
+ },
98
+ {
99
+ "problem": "3 Γ— πŸ₯ = πŸ‹ + 3, πŸ‹ = 12",
100
+ "output": "πŸ₯ = 5 Explanation: If lemon equals 12, then 3 times kiwi equals 15, so kiwi equals 5."
101
+ },
102
+ {
103
+ "problem": "πŸ₯­ Γ— πŸ₯­ = 36",
104
+ "output": "πŸ₯­ = 6 Explanation : If mango squared equals 36, then mango equals 6."
105
+ },
106
+ {
107
+ "problem": "πŸ‘ Γ· 4 = 3",
108
+ "output": "πŸ‘ = 12 Explanation: If peach divided by 4 equals 3, then peach equals 12."
109
+ },
110
+ {
111
+ "problem": "πŸ₯₯ + πŸ₯₯ + πŸ₯₯ = 🍈 Γ— 3, 🍈 = 5",
112
+ "output": "πŸ₯₯ = 5 Explanation : If melon equals 5, then melon times 3 equals 15, so three coconuts equal 15, making each coconut equal to 5."
113
+ },
114
+ {
115
+ "problem": "🍏 + 🍐 = 11, 🍏 - 🍐 = 1",
116
+ "output": "🍏 = 6, 🍐 = 5 Explanation : Solving the system of equations: green apple plus pear equals 11, and green apple minus pear equals 1."
117
+ },
118
+ {
119
+ "problem": "2 Γ— πŸ‹ + 🍊 = 25, πŸ‹ = 7",
120
+ "output": "🍊 = 11 Explanation : If lemon equals 7, then 2 times lemon equals 14, so orange equals 11."
121
+ },
122
+ {
123
+ "problem": "πŸ‰ Γ· πŸ‡ = 4, πŸ‡ = 3",
124
+ "output": "πŸ‰ = 12 Explanation : If grapes equal 3 and watermelon divided by grapes equals 4, then watermelon equals 12."
125
+ },
126
+ {
127
+ "problem": "(🍎 + 🍌) Γ— 2 = 18, 🍎 = 4",
128
+ "output": "🍌 = 5 Explanation : If apple equals 4, then apple plus banana equals 9, so banana equals 5."
129
+ },
130
+ {
131
+ "problem": "πŸ“ Γ— πŸ“ - πŸ“ = 20",
132
+ "output": "πŸ“ = 5 Explanation : If strawberry squared minus strawberry equals 20, then strawberry equals 5 (5Β² - 5 = 20)."
133
+ },
134
+ {
135
+ "problem": "πŸ₯‘ + πŸ₯‘ + πŸ₯‘ + πŸ₯‘ = 🍍 Γ— 2, 🍍 = 10",
136
+ "output": "πŸ₯‘ = 5 Explanation : If pineapple equals 10, then pineapple times 2 equals 20, so four avocados equal 20, making each avocado equal to 5."
137
+ },
138
+ {
139
+ "problem": "πŸ’ + πŸ’ = 🍊 + 3, 🍊 = 5",
140
+ "output": "πŸ’ = 4 Explanation : If orange equals 5, then two cherries equal 8, so each cherry equals 4."
141
+ },
142
+ {
143
+ "problem": "3 Γ— (🍎 - 🍐) = 6, 🍎 = 5",
144
+ "output": "🍐 = 3 Explanation : If apple equals 5, then apple minus pear equals 2, so pear equals 3."
145
+ },
146
+ {
147
+ "problem": "🍌 Γ· πŸ“ = 3, πŸ“ = 2",
148
+ "output": "🍌 = 6 Explanation : If strawberry equals 2 and banana divided by strawberry equals 3, then banana equals 6."
149
+ },
150
+ {
151
+ "problem": "πŸ₯ Γ— πŸ₯ Γ— πŸ₯ = 27",
152
+ "output": "πŸ₯ = 3 Explanation : If kiwi cubed equals 27, then kiwi equals 3."
153
+ },
154
+ {
155
+ "problem": "πŸ‘ + πŸ’ + πŸ“ = 13, πŸ‘ = 5, πŸ’ = 4",
156
+ "output": "πŸ“ = 4 Explanation : If peach equals 5 and cherry equals 4, then strawberry equals 4."
157
+ },
158
+ {
159
+ "problem": "🍎 Γ— 🍌 = 24, 🍎 = 6",
160
+ "output": "🍌 = 4 Explanation : If apple equals 6 and apple times banana equals 24, then banana equals 4."
161
+ },
162
+ {
163
+ "problem": "πŸ‰ - 🍈 = πŸ‡ + 1, πŸ‰ = 10, πŸ‡ = 3",
164
+ "output": "🍈 = 6 Explanation : If watermelon equals 10 and grapes equal 3, then melon equals 6."
165
+ },
166
+ {
167
+ "problem": "(🍊 + πŸ‹) Γ· 2 = 7, 🍊 = 5",
168
+ "output": "πŸ‹ = 9 Explanation : If orange equals 5, then orange plus lemon equals 14, so lemon equals 9."
169
+ },
170
+ {
171
+ "problem": "🍍 Γ— 2 - πŸ₯₯ = 11, 🍍 = 7",
172
+ "output": "πŸ₯₯ = 3 Explanation : If pineapple equals 7, then pineapple times 2 equals 14, so coconut equals 3."
173
+ },
174
+ {
175
+ "problem": "🍏 + 🍐 + 🍊 = 18, 🍏 = 🍐 + 2, 🍊 = 🍐 + 1",
176
+ "output": "🍏 = 7, 🍐 = 5, 🍊 = 6 Explanation : Solving the system of equations with the given relationships between green apple, pear, and orange."
177
+ },
178
+ {
179
+ "problem": "🍌 Γ— (🍎 - πŸ“) = 12, 🍎 = 7, πŸ“ = 4",
180
+ "output": "🍌 = 4 Explanation : If apple equals 7 and strawberry equals 4, then apple minus strawberry equals 3, so banana equals 4."
181
+ },
182
+ {
183
+ "problem": "πŸ‡ + πŸ‡ + πŸ‡ = (πŸ‘ Γ— 2) + 3, πŸ‘ = 4",
184
+ "output": "πŸ‡ = 5 Explanation : If peach equals 4, then peach times 2 plus 3 equals 11, so three grapes equal 15, making each grape equal to 5."
185
+ },
186
+ {
187
+ "problem": "πŸ₯­ Γ· (πŸ‹ - 🍊) = 2, πŸ‹ = 7, 🍊 = 3",
188
+ "output": "πŸ₯­ = 8 Explanation : If lemon equals 7 and orange equals 3, then lemon minus orange equals 4, so mango equals 8."
189
+ }
190
+ ]
191
+
192
+ # Prepare dataset for training
193
+ def format_data(example):
194
+ # Format input and output as a conversation
195
+ messages = [
196
+ {"role": "user", "content": example["problem"]},
197
+ {"role": "assistant", "content": example["output"]}
198
+ ]
199
+ # Apply chat template and tokenize
200
+ text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
201
+ return {"text": text}
202
+
203
+ from datasets import Dataset
204
+ # Convert list to Hugging Face Dataset
205
+ hf_dataset = Dataset.from_list(dataset)
206
+ tokenized_dataset = hf_dataset.map(format_data, remove_columns=["problem", "output"])
207
+
208
+ # Tokenize the dataset
209
+ def tokenize_function(examples):
210
+ return tokenizer(
211
+ examples["text"],
212
+ padding="max_length",
213
+ truncation=True,
214
+ max_length=512,
215
+ return_tensors="pt"
216
+ )
217
+
218
+ tokenized_dataset = tokenized_dataset.map(tokenize_function, batched=True)
219
+
220
+ # Split dataset into train and eval (90% train, 10% eval)
221
+ train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
222
+ train_dataset = train_test_split["train"]
223
+ eval_dataset = train_test_split["test"]
224
+
225
+ # Define data collator
226
+ from transformers import DataCollatorForLanguageModeling
227
+ data_collator = DataCollatorForLanguageModeling(
228
+ tokenizer=tokenizer,
229
+ mlm=False
230
+ )
231
+
232
+ from transformers import TrainingArguments, Trainer
233
+
234
+ # Define training arguments
235
+ training_args = TrainingArguments(
236
+ output_dir="/kaggle/working/model_output",
237
+ overwrite_output_dir=True,
238
+ num_train_epochs=3,
239
+ per_device_train_batch_size=2, # Adjust based on GPU memory (T4x2)
240
+ per_device_eval_batch_size=2,
241
+ gradient_accumulation_steps=4, # Effective batch size = 2 * 4 = 8
242
+ evaluation_strategy="epoch",
243
+ save_strategy="epoch",
244
+ learning_rate=2e-5,
245
+ weight_decay=0.01,
246
+ fp16=True, # Use mixed precision for T4 GPU
247
+ logging_dir="/kaggle/working/logs",
248
+ logging_steps=10,
249
+ load_best_model_at_end=True,
250
+ metric_for_best_model="loss",
251
+ report_to="none", # Disable wandb in Kaggle
252
+ push_to_hub=False,
253
+ )
254
+
255
+ # Define compute metrics (optional, for evaluation)
256
+ def compute_metrics(eval_pred):
257
+ logits, labels = eval_pred
258
+ predictions = torch.argmax(torch.tensor(logits), dim=-1)
259
+ return {"accuracy": (predictions == labels).mean().item()}
260
+
261
+ # Initialize Trainer
262
+ trainer = Trainer(
263
+ model=model,
264
+ args=training_args,
265
+ train_dataset=train_dataset,
266
+ eval_dataset=eval_dataset,
267
+ data_collator=data_collator,
268
+ #compute_metrics=compute_metrics # Uncomment if you want accuracy metrics
269
+ )
270
+
271
+ # Train the model
272
+ trainer.train()
273
+
274
+ # Save the model and tokenizer
275
+ output_dir = "/kaggle/working/finetuned_model"
276
+ model.save_pretrained(output_dir)
277
+ tokenizer.save_pretrained(output_dir)
278
+
279
+ # Zip the model directory for easy download (optional)
280
+ import shutil
281
+ shutil.make_archive("/kaggle/working/finetuned_model", "zip", output_dir)
282
+ print("Model and tokenizer saved and zipped at /kaggle/working/finetuned_model.zip")
283
+
284
+ # Test inference
285
+ messages = [
286
+ {"role": "user", "content": "πŸ₯­ Γ· (πŸ‹ - 🍊) = 2, πŸ‹ = 7, 🍊 = 3"}
287
+ ]
288
+ input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
289
+ outputs = model.generate(input_tensor, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
290
+ result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
291
+ print("Test inference result:", result)
292
+
293
+ from peft import PeftModel
294
+
295
+ output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth"
296
+ torch.save(model.state_dict(), output_weights_path)
297
+
298
+
299
+ import shutil
300
+ shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir)
301
+ print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip")
302
+
303
+ get_ipython().run_line_magic('pip', 'install gradio')
304
+
305
+ from peft import PeftModel
306
+
307
+ output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth"
308
+ torch.save(model.state_dict(), output_weights_path)
309
+
310
+
311
+ import shutil
312
+ shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir)
313
+ print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip")
314
+
315
+ from peft import PeftModel
316
+
317
+ output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth"
318
+ torch.save(model.state_dict(), output_weights_path)
319
+
320
+
321
+ import shutil
322
+ shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir)
323
+ print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip")
324
+
325
+ import gradio as gr
326
+
327
  def process_input(user_input):
328
  """Process user input through the model and return the result."""
329
  messages = [{"role": "user", "content": user_input}]
 
346
 
347
  demo.launch(share=True)
348
 
349
+ demo.launch(share=True)
 
350
 
351
+ import os
352
+ from getpass import getpass
353
+ from huggingface_hub import HfApi, Repository
354
+ import re
355
+
356
+ # Get your Hugging Face token
357
+ hf_token = getpass("Enter your Hugging Face token: ")
358
+ api = HfApi(token=hf_token)
359
+
360
+ # Get your Space name (username/space-name)
361
+ space_name = input("Enter your Hugging Face Space name (username/space-name): ")
362
+
363
+ # Extract the Gradio code from your notebook
364
+ # This assumes your Gradio app is defined in a cell or cells in your notebook
365
+ from IPython import get_ipython
366
+
367
+ # Get all cells from the notebook
368
+ cells = get_ipython().user_ns.get('In', [])
369
+
370
+ # Extract cells that contain Gradio code
371
+ gradio_code = []
372
+ in_gradio_block = False
373
+ for cell in cells:
374
+ # Look for cells that import gradio or define the interface
375
+ if 'import gradio' in cell or 'gr.Interface' in cell or in_gradio_block:
376
+ in_gradio_block = True
377
+ gradio_code.append(cell)
378
+ # If we find a cell that seems to end the Gradio app definition
379
+ elif in_gradio_block and ('if __name__' in cell or 'demo.launch()' in cell):
380
+ gradio_code.append(cell)
381
+ in_gradio_block = False
382
+
383
+ # Combine the code and ensure it has a launch method
384
+ combined_code = "\n\n".join(gradio_code)
385
+
386
+ # Make sure the app launches when run
387
+ if 'if __name__ == "__main__"' not in combined_code:
388
+ combined_code += '\n\nif __name__ == "__main__":\n demo.launch()'
389
+
390
+ # Save to app.py
391
+ with open("app.py", "w") as f:
392
+ f.write(combined_code)
393
+
394
+ print("Extracted Gradio code and saved to app.py")
395
+
396
+ # Clone the existing Space repository
397
+ repo = Repository(
398
+ local_dir="space_repo",
399
+ clone_from=f"https://huggingface.co/spaces/{space_name}",
400
+ token=hf_token,
401
+ git_user="marwashahid",
402
+ git_email="[email protected]"
403
+ )
404
 
405
+ # Copy app.py to the repository
406
  import shutil
407
+ shutil.copy("app.py", "space_repo/app.py")
408
+
409
+ # Add requirements if needed
410
+ requirements = """
411
+ gradio>=3.50.2
412
+ """
413
+ with open("space_repo/requirements.txt", "w") as f:
414
+ f.write(requirements)
415
+
416
+ # Commit and push changes
417
+ repo.git_add()
418
+ repo.git_commit("Update from Kaggle notebook")
419
+ repo.git_push()
420
+
421
+ print(f"Successfully deployed to https://huggingface.co/spaces/{space_name}")
422
 
423
  import os
424
  from getpass import getpass