import gradio as gr get_ipython().run_line_magic('pip', 'install transformers==4.45.0 accelerate==0.26.0 bitsandbytes==0.43.3') import torch print(torch.__version__) print(torch.cuda.is_available()) print(torch.version.cuda) get_ipython().system('pip show bitsandbytes') import bitsandbytes print(bitsandbytes.__version__) import bitsandbytes as bnb import torch x = torch.randn(10, device="cuda") y = bnb.functional.quantize_4bit(x) print("Quantization worked!") import bitsandbytes.nn import bitsandbytes.functional print("Submodules imported successfully!") import transformers transformers.utils.is_bitsandbytes_available = lambda: True import torch from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig import os import gc torch.cuda.empty_cache() gc.collect() bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, bnb_4bit_use_double_quant=True, ) # Define model and tokenizer model_name = "deepseek-ai/deepseek-math-7b-instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) # Set padding token if not already set if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelForCausalLM.from_pretrained( model_name, quantization_config=bnb_config, device_map="auto", trust_remote_code=True, ) from peft import LoraConfig, get_peft_model # Define LoRA configuration lora_config = LoraConfig( r=16, # Rank of the LoRA adaptation lora_alpha=32, # Scaling factor target_modules=["q_proj", "v_proj"], # Target attention layers (adjust based on model architecture) lora_dropout=0.05, # Dropout for regularization bias="none", # No bias in LoRA layers task_type="CAUSAL_LM", # Task type for causal language modeling ) # Apply LoRA to the model model = get_peft_model(model, lora_config) model.print_trainable_parameters() # Verify trainable parameters dataset = [ { "problem": "๐ŸŽ + ๐ŸŽ + ๐ŸŽ = 12", "output": "๐ŸŽ = 4 Explanation: If three apples equal 12, then each apple equals 4 as 12/3 is 4." }, { "problem": "๐ŸŒ + ๐ŸŒ = 10", "output": "๐ŸŒ = 5 Explanation: If two bananas equal 10, then each banana equals 5." }, { "problem": "๐ŸŠ ร— 3 = 15", "output": "๐ŸŠ = 5 Explanation: If an orange multiplied by 3 equals 15, then each orange equals 5." }, { "problem": "๐Ÿ‡ รท 2 = 6", "output": "๐Ÿ‡ = 12 Explanation : If grapes divided by 2 equals 6, then grapes equals 12." }, { "problem": "๐Ÿ“ + ๐Ÿ“ + ๐Ÿ“ + ๐Ÿ“ = 20", "output": "๐Ÿ“ = 5 Explanation : If four strawberries equal 20, then each strawberry equals 5." }, { "problem": "๐Ÿ - ๐Ÿ‰ = 3, ๐Ÿ + ๐Ÿ‰ = 15", "output": "๐Ÿ = 9, ๐Ÿ‰ = 6 Explanation : Using the system of equations, we can solve that pineapple equals 9 and watermelon equals 6." }, { "problem": "๐Ÿ’ + ๐Ÿ’ + ๐Ÿ = 16, ๐Ÿ + ๐Ÿ + ๐Ÿ’ = 19", "output": "๐Ÿ’ = 5, ๐Ÿ = 6 Explanation : Solving the system of equations: 2๐Ÿ’ + ๐Ÿ = 16 and ๐Ÿ’ + 2๐Ÿ = 19." }, { "problem": "3 ร— ๐Ÿฅ = ๐Ÿ‹ + 3, ๐Ÿ‹ = 12", "output": "๐Ÿฅ = 5 Explanation: If lemon equals 12, then 3 times kiwi equals 15, so kiwi equals 5." }, { "problem": "๐Ÿฅญ ร— ๐Ÿฅญ = 36", "output": "๐Ÿฅญ = 6 Explanation : If mango squared equals 36, then mango equals 6." }, { "problem": "๐Ÿ‘ รท 4 = 3", "output": "๐Ÿ‘ = 12 Explanation: If peach divided by 4 equals 3, then peach equals 12." }, { "problem": "๐Ÿฅฅ + ๐Ÿฅฅ + ๐Ÿฅฅ = ๐Ÿˆ ร— 3, ๐Ÿˆ = 5", "output": "๐Ÿฅฅ = 5 Explanation : If melon equals 5, then melon times 3 equals 15, so three coconuts equal 15, making each coconut equal to 5." }, { "problem": "๐Ÿ + ๐Ÿ = 11, ๐Ÿ - ๐Ÿ = 1", "output": "๐Ÿ = 6, ๐Ÿ = 5 Explanation : Solving the system of equations: green apple plus pear equals 11, and green apple minus pear equals 1." }, { "problem": "2 ร— ๐Ÿ‹ + ๐ŸŠ = 25, ๐Ÿ‹ = 7", "output": "๐ŸŠ = 11 Explanation : If lemon equals 7, then 2 times lemon equals 14, so orange equals 11." }, { "problem": "๐Ÿ‰ รท ๐Ÿ‡ = 4, ๐Ÿ‡ = 3", "output": "๐Ÿ‰ = 12 Explanation : If grapes equal 3 and watermelon divided by grapes equals 4, then watermelon equals 12." }, { "problem": "(๐ŸŽ + ๐ŸŒ) ร— 2 = 18, ๐ŸŽ = 4", "output": "๐ŸŒ = 5 Explanation : If apple equals 4, then apple plus banana equals 9, so banana equals 5." }, { "problem": "๐Ÿ“ ร— ๐Ÿ“ - ๐Ÿ“ = 20", "output": "๐Ÿ“ = 5 Explanation : If strawberry squared minus strawberry equals 20, then strawberry equals 5 (5ยฒ - 5 = 20)." }, { "problem": "๐Ÿฅ‘ + ๐Ÿฅ‘ + ๐Ÿฅ‘ + ๐Ÿฅ‘ = ๐Ÿ ร— 2, ๐Ÿ = 10", "output": "๐Ÿฅ‘ = 5 Explanation : If pineapple equals 10, then pineapple times 2 equals 20, so four avocados equal 20, making each avocado equal to 5." }, { "problem": "๐Ÿ’ + ๐Ÿ’ = ๐ŸŠ + 3, ๐ŸŠ = 5", "output": "๐Ÿ’ = 4 Explanation : If orange equals 5, then two cherries equal 8, so each cherry equals 4." }, { "problem": "3 ร— (๐ŸŽ - ๐Ÿ) = 6, ๐ŸŽ = 5", "output": "๐Ÿ = 3 Explanation : If apple equals 5, then apple minus pear equals 2, so pear equals 3." }, { "problem": "๐ŸŒ รท ๐Ÿ“ = 3, ๐Ÿ“ = 2", "output": "๐ŸŒ = 6 Explanation : If strawberry equals 2 and banana divided by strawberry equals 3, then banana equals 6." }, { "problem": "๐Ÿฅ ร— ๐Ÿฅ ร— ๐Ÿฅ = 27", "output": "๐Ÿฅ = 3 Explanation : If kiwi cubed equals 27, then kiwi equals 3." }, { "problem": "๐Ÿ‘ + ๐Ÿ’ + ๐Ÿ“ = 13, ๐Ÿ‘ = 5, ๐Ÿ’ = 4", "output": "๐Ÿ“ = 4 Explanation : If peach equals 5 and cherry equals 4, then strawberry equals 4." }, { "problem": "๐ŸŽ ร— ๐ŸŒ = 24, ๐ŸŽ = 6", "output": "๐ŸŒ = 4 Explanation : If apple equals 6 and apple times banana equals 24, then banana equals 4." }, { "problem": "๐Ÿ‰ - ๐Ÿˆ = ๐Ÿ‡ + 1, ๐Ÿ‰ = 10, ๐Ÿ‡ = 3", "output": "๐Ÿˆ = 6 Explanation : If watermelon equals 10 and grapes equal 3, then melon equals 6." }, { "problem": "(๐ŸŠ + ๐Ÿ‹) รท 2 = 7, ๐ŸŠ = 5", "output": "๐Ÿ‹ = 9 Explanation : If orange equals 5, then orange plus lemon equals 14, so lemon equals 9." }, { "problem": "๐Ÿ ร— 2 - ๐Ÿฅฅ = 11, ๐Ÿ = 7", "output": "๐Ÿฅฅ = 3 Explanation : If pineapple equals 7, then pineapple times 2 equals 14, so coconut equals 3." }, { "problem": "๐Ÿ + ๐Ÿ + ๐ŸŠ = 18, ๐Ÿ = ๐Ÿ + 2, ๐ŸŠ = ๐Ÿ + 1", "output": "๐Ÿ = 7, ๐Ÿ = 5, ๐ŸŠ = 6 Explanation : Solving the system of equations with the given relationships between green apple, pear, and orange." }, { "problem": "๐ŸŒ ร— (๐ŸŽ - ๐Ÿ“) = 12, ๐ŸŽ = 7, ๐Ÿ“ = 4", "output": "๐ŸŒ = 4 Explanation : If apple equals 7 and strawberry equals 4, then apple minus strawberry equals 3, so banana equals 4." }, { "problem": "๐Ÿ‡ + ๐Ÿ‡ + ๐Ÿ‡ = (๐Ÿ‘ ร— 2) + 3, ๐Ÿ‘ = 4", "output": "๐Ÿ‡ = 5 Explanation : If peach equals 4, then peach times 2 plus 3 equals 11, so three grapes equal 15, making each grape equal to 5." }, { "problem": "๐Ÿฅญ รท (๐Ÿ‹ - ๐ŸŠ) = 2, ๐Ÿ‹ = 7, ๐ŸŠ = 3", "output": "๐Ÿฅญ = 8 Explanation : If lemon equals 7 and orange equals 3, then lemon minus orange equals 4, so mango equals 8." } ] # Prepare dataset for training def format_data(example): # Format input and output as a conversation messages = [ {"role": "user", "content": example["problem"]}, {"role": "assistant", "content": example["output"]} ] # Apply chat template and tokenize text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) return {"text": text} from datasets import Dataset # Convert list to Hugging Face Dataset hf_dataset = Dataset.from_list(dataset) tokenized_dataset = hf_dataset.map(format_data, remove_columns=["problem", "output"]) # Tokenize the dataset def tokenize_function(examples): return tokenizer( examples["text"], padding="max_length", truncation=True, max_length=512, return_tensors="pt" ) tokenized_dataset = tokenized_dataset.map(tokenize_function, batched=True) # Split dataset into train and eval (90% train, 10% eval) train_test_split = tokenized_dataset.train_test_split(test_size=0.1) train_dataset = train_test_split["train"] eval_dataset = train_test_split["test"] # Define data collator from transformers import DataCollatorForLanguageModeling data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=False ) from transformers import TrainingArguments, Trainer # Define training arguments training_args = TrainingArguments( output_dir="/kaggle/working/model_output", overwrite_output_dir=True, num_train_epochs=3, per_device_train_batch_size=2, # Adjust based on GPU memory (T4x2) per_device_eval_batch_size=2, gradient_accumulation_steps=4, # Effective batch size = 2 * 4 = 8 evaluation_strategy="epoch", save_strategy="epoch", learning_rate=2e-5, weight_decay=0.01, fp16=True, # Use mixed precision for T4 GPU logging_dir="/kaggle/working/logs", logging_steps=10, load_best_model_at_end=True, metric_for_best_model="loss", report_to="none", # Disable wandb in Kaggle push_to_hub=False, ) # Define compute metrics (optional, for evaluation) def compute_metrics(eval_pred): logits, labels = eval_pred predictions = torch.argmax(torch.tensor(logits), dim=-1) return {"accuracy": (predictions == labels).mean().item()} # Initialize Trainer trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, data_collator=data_collator, #compute_metrics=compute_metrics # Uncomment if you want accuracy metrics ) # Train the model trainer.train() # Save the model and tokenizer output_dir = "/kaggle/working/finetuned_model" model.save_pretrained(output_dir) tokenizer.save_pretrained(output_dir) # Zip the model directory for easy download (optional) import shutil shutil.make_archive("/kaggle/working/finetuned_model", "zip", output_dir) print("Model and tokenizer saved and zipped at /kaggle/working/finetuned_model.zip") # Test inference messages = [ {"role": "user", "content": "๐Ÿฅญ รท (๐Ÿ‹ - ๐ŸŠ) = 2, ๐Ÿ‹ = 7, ๐ŸŠ = 3"} ] input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device) outputs = model.generate(input_tensor, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id) result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True) print("Test inference result:", result) from peft import PeftModel output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth" torch.save(model.state_dict(), output_weights_path) import shutil shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir) print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip") get_ipython().run_line_magic('pip', 'install gradio') from peft import PeftModel output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth" torch.save(model.state_dict(), output_weights_path) import shutil shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir) print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip") from peft import PeftModel output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth" torch.save(model.state_dict(), output_weights_path) import shutil shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir) print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip") import gradio as gr def process_input(user_input): """Process user input through the model and return the result.""" messages = [{"role": "user", "content": user_input}] # Apply chat template and generate response input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device) outputs = model.generate(input_tensor, max_new_tokens=300, pad_token_id=tokenizer.eos_token_id) result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True) return result # Create Gradio interface demo = gr.Interface( fn=process_input, inputs=gr.Textbox(placeholder="Enter your equation (e.g. ๐Ÿฅญ รท (๐Ÿ‹ - ๐ŸŠ) = 2, ๐Ÿ‹ = 7, ๐ŸŠ = 3)"), outputs=gr.Textbox(label="Model Output"), title="Emoji Math Solver", description="Enter a math equation with emojis, and the model will solve it." ) demo.launch(share=True) demo.launch(share=True) import os from getpass import getpass from huggingface_hub import HfApi, Repository import re # Get your Hugging Face token hf_token = getpass("Enter your Hugging Face token: ") api = HfApi(token=hf_token) # Get your Space name (username/space-name) space_name = input("Enter your Hugging Face Space name (username/space-name): ") # Extract the Gradio code from your notebook # This assumes your Gradio app is defined in a cell or cells in your notebook from IPython import get_ipython # Get all cells from the notebook cells = get_ipython().user_ns.get('In', []) # Extract cells that contain Gradio code gradio_code = [] in_gradio_block = False for cell in cells: # Look for cells that import gradio or define the interface if 'import gradio' in cell or 'gr.Interface' in cell or in_gradio_block: in_gradio_block = True gradio_code.append(cell) # If we find a cell that seems to end the Gradio app definition elif in_gradio_block and ('if __name__' in cell or 'demo.launch()' in cell): gradio_code.append(cell) in_gradio_block = False # Combine the code and ensure it has a launch method combined_code = "\n\n".join(gradio_code) # Make sure the app launches when run if 'if __name__ == "__main__"' not in combined_code: combined_code += '\n\nif __name__ == "__main__":\n demo.launch()' # Save to app.py with open("app.py", "w") as f: f.write(combined_code) print("Extracted Gradio code and saved to app.py") # Clone the existing Space repository repo = Repository( local_dir="space_repo", clone_from=f"https://huggingface.co/spaces/{space_name}", token=hf_token, git_user="marwashahid", git_email="marvashahid09@gmail.com" ) # Copy app.py to the repository import shutil shutil.copy("app.py", "space_repo/app.py") # Add requirements if needed requirements = """ gradio>=3.50.2 """ with open("space_repo/requirements.txt", "w") as f: f.write(requirements) # Commit and push changes repo.git_add() repo.git_commit("Update from Kaggle notebook") repo.git_push() print(f"Successfully deployed to https://huggingface.co/spaces/{space_name}") import os from getpass import getpass from huggingface_hub import HfApi, Repository import re # Get your Hugging Face token hf_token = getpass("Enter your Hugging Face token: ") api = HfApi(token=hf_token) # Get your Space name (username/space-name) space_name = input("Enter your Hugging Face Space name (username/space-name): ") # Extract the Gradio code from your notebook # This assumes your Gradio app is defined in a cell or cells in your notebook from IPython import get_ipython # Get all cells from the notebook cells = get_ipython().user_ns.get('In', []) # Extract cells that contain Gradio code gradio_code = [] in_gradio_block = False for cell in cells: # Look for cells that import gradio or define the interface if 'import gradio' in cell or 'gr.Interface' in cell or in_gradio_block: in_gradio_block = True gradio_code.append(cell) # If we find a cell that seems to end the Gradio app definition elif in_gradio_block and ('if __name__' in cell or 'demo.launch()' in cell): gradio_code.append(cell) in_gradio_block = False # Combine the code and ensure it has a launch method combined_code = "\n\n".join(gradio_code) # Make sure the app launches when run if 'if __name__ == "__main__"' not in combined_code: combined_code += '\n\nif __name__ == "__main__":\n demo.launch()' # Save to app.py with open("app.py", "w") as f: f.write(combined_code) print("Extracted Gradio code and saved to app.py") # Clone the existing Space repository repo = Repository( local_dir="space_repo", clone_from=f"https://huggingface.co/spaces/{space_name}", token=hf_token, git_user="marwashahid", git_email="marvashahid09@gmail.com" ) # Copy app.py to the repository import shutil shutil.copy("app.py", "space_repo/app.py") # Add requirements if needed requirements = """ gradio>=3.50.2 """ with open("space_repo/requirements.txt", "w") as f: f.write(requirements) # Commit and push changes repo.git_add() repo.git_commit("Update from Kaggle notebook") repo.git_push() print(f"Successfully deployed to https://huggingface.co/spaces/{space_name}")