Spaces:
Runtime error
Runtime error
import gradio as gr | |
get_ipython().run_line_magic('pip', 'install transformers==4.45.0 accelerate==0.26.0 bitsandbytes==0.43.3') | |
import torch | |
print(torch.__version__) | |
print(torch.cuda.is_available()) | |
print(torch.version.cuda) | |
get_ipython().system('pip show bitsandbytes') | |
import bitsandbytes | |
print(bitsandbytes.__version__) | |
import bitsandbytes as bnb | |
import torch | |
x = torch.randn(10, device="cuda") | |
y = bnb.functional.quantize_4bit(x) | |
print("Quantization worked!") | |
import bitsandbytes.nn | |
import bitsandbytes.functional | |
print("Submodules imported successfully!") | |
import transformers | |
transformers.utils.is_bitsandbytes_available = lambda: True | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig | |
import os | |
import gc | |
torch.cuda.empty_cache() | |
gc.collect() | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype=torch.bfloat16, | |
bnb_4bit_use_double_quant=True, | |
) | |
# Define model and tokenizer | |
model_name = "deepseek-ai/deepseek-math-7b-instruct" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Set padding token if not already set | |
if tokenizer.pad_token is None: | |
tokenizer.pad_token = tokenizer.eos_token | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
quantization_config=bnb_config, | |
device_map="auto", | |
trust_remote_code=True, | |
) | |
from peft import LoraConfig, get_peft_model | |
# Define LoRA configuration | |
lora_config = LoraConfig( | |
r=16, # Rank of the LoRA adaptation | |
lora_alpha=32, # Scaling factor | |
target_modules=["q_proj", "v_proj"], # Target attention layers (adjust based on model architecture) | |
lora_dropout=0.05, # Dropout for regularization | |
bias="none", # No bias in LoRA layers | |
task_type="CAUSAL_LM", # Task type for causal language modeling | |
) | |
# Apply LoRA to the model | |
model = get_peft_model(model, lora_config) | |
model.print_trainable_parameters() # Verify trainable parameters | |
dataset = [ | |
{ | |
"problem": "π + π + π = 12", | |
"output": "π = 4 Explanation: If three apples equal 12, then each apple equals 4 as 12/3 is 4." | |
}, | |
{ | |
"problem": "π + π = 10", | |
"output": "π = 5 Explanation: If two bananas equal 10, then each banana equals 5." | |
}, | |
{ | |
"problem": "π Γ 3 = 15", | |
"output": "π = 5 Explanation: If an orange multiplied by 3 equals 15, then each orange equals 5." | |
}, | |
{ | |
"problem": "π Γ· 2 = 6", | |
"output": "π = 12 Explanation : If grapes divided by 2 equals 6, then grapes equals 12." | |
}, | |
{ | |
"problem": "π + π + π + π = 20", | |
"output": "π = 5 Explanation : If four strawberries equal 20, then each strawberry equals 5." | |
}, | |
{ | |
"problem": "π - π = 3, π + π = 15", | |
"output": "π = 9, π = 6 Explanation : Using the system of equations, we can solve that pineapple equals 9 and watermelon equals 6." | |
}, | |
{ | |
"problem": "π + π + π = 16, π + π + π = 19", | |
"output": "π = 5, π = 6 Explanation : Solving the system of equations: 2π + π = 16 and π + 2π = 19." | |
}, | |
{ | |
"problem": "3 Γ π₯ = π + 3, π = 12", | |
"output": "π₯ = 5 Explanation: If lemon equals 12, then 3 times kiwi equals 15, so kiwi equals 5." | |
}, | |
{ | |
"problem": "π₯ Γ π₯ = 36", | |
"output": "π₯ = 6 Explanation : If mango squared equals 36, then mango equals 6." | |
}, | |
{ | |
"problem": "π Γ· 4 = 3", | |
"output": "π = 12 Explanation: If peach divided by 4 equals 3, then peach equals 12." | |
}, | |
{ | |
"problem": "π₯₯ + π₯₯ + π₯₯ = π Γ 3, π = 5", | |
"output": "π₯₯ = 5 Explanation : If melon equals 5, then melon times 3 equals 15, so three coconuts equal 15, making each coconut equal to 5." | |
}, | |
{ | |
"problem": "π + π = 11, π - π = 1", | |
"output": "π = 6, π = 5 Explanation : Solving the system of equations: green apple plus pear equals 11, and green apple minus pear equals 1." | |
}, | |
{ | |
"problem": "2 Γ π + π = 25, π = 7", | |
"output": "π = 11 Explanation : If lemon equals 7, then 2 times lemon equals 14, so orange equals 11." | |
}, | |
{ | |
"problem": "π Γ· π = 4, π = 3", | |
"output": "π = 12 Explanation : If grapes equal 3 and watermelon divided by grapes equals 4, then watermelon equals 12." | |
}, | |
{ | |
"problem": "(π + π) Γ 2 = 18, π = 4", | |
"output": "π = 5 Explanation : If apple equals 4, then apple plus banana equals 9, so banana equals 5." | |
}, | |
{ | |
"problem": "π Γ π - π = 20", | |
"output": "π = 5 Explanation : If strawberry squared minus strawberry equals 20, then strawberry equals 5 (5Β² - 5 = 20)." | |
}, | |
{ | |
"problem": "π₯ + π₯ + π₯ + π₯ = π Γ 2, π = 10", | |
"output": "π₯ = 5 Explanation : If pineapple equals 10, then pineapple times 2 equals 20, so four avocados equal 20, making each avocado equal to 5." | |
}, | |
{ | |
"problem": "π + π = π + 3, π = 5", | |
"output": "π = 4 Explanation : If orange equals 5, then two cherries equal 8, so each cherry equals 4." | |
}, | |
{ | |
"problem": "3 Γ (π - π) = 6, π = 5", | |
"output": "π = 3 Explanation : If apple equals 5, then apple minus pear equals 2, so pear equals 3." | |
}, | |
{ | |
"problem": "π Γ· π = 3, π = 2", | |
"output": "π = 6 Explanation : If strawberry equals 2 and banana divided by strawberry equals 3, then banana equals 6." | |
}, | |
{ | |
"problem": "π₯ Γ π₯ Γ π₯ = 27", | |
"output": "π₯ = 3 Explanation : If kiwi cubed equals 27, then kiwi equals 3." | |
}, | |
{ | |
"problem": "π + π + π = 13, π = 5, π = 4", | |
"output": "π = 4 Explanation : If peach equals 5 and cherry equals 4, then strawberry equals 4." | |
}, | |
{ | |
"problem": "π Γ π = 24, π = 6", | |
"output": "π = 4 Explanation : If apple equals 6 and apple times banana equals 24, then banana equals 4." | |
}, | |
{ | |
"problem": "π - π = π + 1, π = 10, π = 3", | |
"output": "π = 6 Explanation : If watermelon equals 10 and grapes equal 3, then melon equals 6." | |
}, | |
{ | |
"problem": "(π + π) Γ· 2 = 7, π = 5", | |
"output": "π = 9 Explanation : If orange equals 5, then orange plus lemon equals 14, so lemon equals 9." | |
}, | |
{ | |
"problem": "π Γ 2 - π₯₯ = 11, π = 7", | |
"output": "π₯₯ = 3 Explanation : If pineapple equals 7, then pineapple times 2 equals 14, so coconut equals 3." | |
}, | |
{ | |
"problem": "π + π + π = 18, π = π + 2, π = π + 1", | |
"output": "π = 7, π = 5, π = 6 Explanation : Solving the system of equations with the given relationships between green apple, pear, and orange." | |
}, | |
{ | |
"problem": "π Γ (π - π) = 12, π = 7, π = 4", | |
"output": "π = 4 Explanation : If apple equals 7 and strawberry equals 4, then apple minus strawberry equals 3, so banana equals 4." | |
}, | |
{ | |
"problem": "π + π + π = (π Γ 2) + 3, π = 4", | |
"output": "π = 5 Explanation : If peach equals 4, then peach times 2 plus 3 equals 11, so three grapes equal 15, making each grape equal to 5." | |
}, | |
{ | |
"problem": "π₯ Γ· (π - π) = 2, π = 7, π = 3", | |
"output": "π₯ = 8 Explanation : If lemon equals 7 and orange equals 3, then lemon minus orange equals 4, so mango equals 8." | |
} | |
] | |
# Prepare dataset for training | |
def format_data(example): | |
# Format input and output as a conversation | |
messages = [ | |
{"role": "user", "content": example["problem"]}, | |
{"role": "assistant", "content": example["output"]} | |
] | |
# Apply chat template and tokenize | |
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
return {"text": text} | |
from datasets import Dataset | |
# Convert list to Hugging Face Dataset | |
hf_dataset = Dataset.from_list(dataset) | |
tokenized_dataset = hf_dataset.map(format_data, remove_columns=["problem", "output"]) | |
# Tokenize the dataset | |
def tokenize_function(examples): | |
return tokenizer( | |
examples["text"], | |
padding="max_length", | |
truncation=True, | |
max_length=512, | |
return_tensors="pt" | |
) | |
tokenized_dataset = tokenized_dataset.map(tokenize_function, batched=True) | |
# Split dataset into train and eval (90% train, 10% eval) | |
train_test_split = tokenized_dataset.train_test_split(test_size=0.1) | |
train_dataset = train_test_split["train"] | |
eval_dataset = train_test_split["test"] | |
# Define data collator | |
from transformers import DataCollatorForLanguageModeling | |
data_collator = DataCollatorForLanguageModeling( | |
tokenizer=tokenizer, | |
mlm=False | |
) | |
from transformers import TrainingArguments, Trainer | |
# Define training arguments | |
training_args = TrainingArguments( | |
output_dir="/kaggle/working/model_output", | |
overwrite_output_dir=True, | |
num_train_epochs=3, | |
per_device_train_batch_size=2, # Adjust based on GPU memory (T4x2) | |
per_device_eval_batch_size=2, | |
gradient_accumulation_steps=4, # Effective batch size = 2 * 4 = 8 | |
evaluation_strategy="epoch", | |
save_strategy="epoch", | |
learning_rate=2e-5, | |
weight_decay=0.01, | |
fp16=True, # Use mixed precision for T4 GPU | |
logging_dir="/kaggle/working/logs", | |
logging_steps=10, | |
load_best_model_at_end=True, | |
metric_for_best_model="loss", | |
report_to="none", # Disable wandb in Kaggle | |
push_to_hub=False, | |
) | |
# Define compute metrics (optional, for evaluation) | |
def compute_metrics(eval_pred): | |
logits, labels = eval_pred | |
predictions = torch.argmax(torch.tensor(logits), dim=-1) | |
return {"accuracy": (predictions == labels).mean().item()} | |
# Initialize Trainer | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=train_dataset, | |
eval_dataset=eval_dataset, | |
data_collator=data_collator, | |
#compute_metrics=compute_metrics # Uncomment if you want accuracy metrics | |
) | |
# Train the model | |
trainer.train() | |
# Save the model and tokenizer | |
output_dir = "/kaggle/working/finetuned_model" | |
model.save_pretrained(output_dir) | |
tokenizer.save_pretrained(output_dir) | |
# Zip the model directory for easy download (optional) | |
import shutil | |
shutil.make_archive("/kaggle/working/finetuned_model", "zip", output_dir) | |
print("Model and tokenizer saved and zipped at /kaggle/working/finetuned_model.zip") | |
# Test inference | |
messages = [ | |
{"role": "user", "content": "π₯ Γ· (π - π) = 2, π = 7, π = 3"} | |
] | |
input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device) | |
outputs = model.generate(input_tensor, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id) | |
result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True) | |
print("Test inference result:", result) | |
from peft import PeftModel | |
output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth" | |
torch.save(model.state_dict(), output_weights_path) | |
import shutil | |
shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir) | |
print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip") | |
get_ipython().run_line_magic('pip', 'install gradio') | |
from peft import PeftModel | |
output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth" | |
torch.save(model.state_dict(), output_weights_path) | |
import shutil | |
shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir) | |
print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip") | |
from peft import PeftModel | |
output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth" | |
torch.save(model.state_dict(), output_weights_path) | |
import shutil | |
shutil.make_archive("/kaggle/working/fine_tuned_deepseek_math_weights.pth", "zip", output_dir) | |
print("Model and tokenizer saved and zipped at /kaggle/working/weights.zip") | |
import gradio as gr | |
def process_input(user_input): | |
"""Process user input through the model and return the result.""" | |
messages = [{"role": "user", "content": user_input}] | |
# Apply chat template and generate response | |
input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device) | |
outputs = model.generate(input_tensor, max_new_tokens=300, pad_token_id=tokenizer.eos_token_id) | |
result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True) | |
return result | |
# Create Gradio interface | |
demo = gr.Interface( | |
fn=process_input, | |
inputs=gr.Textbox(placeholder="Enter your equation (e.g. π₯ Γ· (π - π) = 2, π = 7, π = 3)"), | |
outputs=gr.Textbox(label="Model Output"), | |
title="Emoji Math Solver", | |
description="Enter a math equation with emojis, and the model will solve it." | |
) | |
demo.launch(share=True) | |
demo.launch(share=True) | |
import os | |
from getpass import getpass | |
from huggingface_hub import HfApi, Repository | |
import re | |
# Get your Hugging Face token | |
hf_token = getpass("Enter your Hugging Face token: ") | |
api = HfApi(token=hf_token) | |
# Get your Space name (username/space-name) | |
space_name = input("Enter your Hugging Face Space name (username/space-name): ") | |
# Extract the Gradio code from your notebook | |
# This assumes your Gradio app is defined in a cell or cells in your notebook | |
from IPython import get_ipython | |
# Get all cells from the notebook | |
cells = get_ipython().user_ns.get('In', []) | |
# Extract cells that contain Gradio code | |
gradio_code = [] | |
in_gradio_block = False | |
for cell in cells: | |
# Look for cells that import gradio or define the interface | |
if 'import gradio' in cell or 'gr.Interface' in cell or in_gradio_block: | |
in_gradio_block = True | |
gradio_code.append(cell) | |
# If we find a cell that seems to end the Gradio app definition | |
elif in_gradio_block and ('if __name__' in cell or 'demo.launch()' in cell): | |
gradio_code.append(cell) | |
in_gradio_block = False | |
# Combine the code and ensure it has a launch method | |
combined_code = "\n\n".join(gradio_code) | |
# Make sure the app launches when run | |
if 'if __name__ == "__main__"' not in combined_code: | |
combined_code += '\n\nif __name__ == "__main__":\n demo.launch()' | |
# Save to app.py | |
with open("app.py", "w") as f: | |
f.write(combined_code) | |
print("Extracted Gradio code and saved to app.py") | |
# Clone the existing Space repository | |
repo = Repository( | |
local_dir="space_repo", | |
clone_from=f"https://huggingface.co/spaces/{space_name}", | |
token=hf_token, | |
git_user="marwashahid", | |
git_email="[email protected]" | |
) | |
# Copy app.py to the repository | |
import shutil | |
shutil.copy("app.py", "space_repo/app.py") | |
# Add requirements if needed | |
requirements = """ | |
gradio>=3.50.2 | |
""" | |
with open("space_repo/requirements.txt", "w") as f: | |
f.write(requirements) | |
# Commit and push changes | |
repo.git_add() | |
repo.git_commit("Update from Kaggle notebook") | |
repo.git_push() | |
print(f"Successfully deployed to https://huggingface.co/spaces/{space_name}") | |
import os | |
from getpass import getpass | |
from huggingface_hub import HfApi, Repository | |
import re | |
# Get your Hugging Face token | |
hf_token = getpass("Enter your Hugging Face token: ") | |
api = HfApi(token=hf_token) | |
# Get your Space name (username/space-name) | |
space_name = input("Enter your Hugging Face Space name (username/space-name): ") | |
# Extract the Gradio code from your notebook | |
# This assumes your Gradio app is defined in a cell or cells in your notebook | |
from IPython import get_ipython | |
# Get all cells from the notebook | |
cells = get_ipython().user_ns.get('In', []) | |
# Extract cells that contain Gradio code | |
gradio_code = [] | |
in_gradio_block = False | |
for cell in cells: | |
# Look for cells that import gradio or define the interface | |
if 'import gradio' in cell or 'gr.Interface' in cell or in_gradio_block: | |
in_gradio_block = True | |
gradio_code.append(cell) | |
# If we find a cell that seems to end the Gradio app definition | |
elif in_gradio_block and ('if __name__' in cell or 'demo.launch()' in cell): | |
gradio_code.append(cell) | |
in_gradio_block = False | |
# Combine the code and ensure it has a launch method | |
combined_code = "\n\n".join(gradio_code) | |
# Make sure the app launches when run | |
if 'if __name__ == "__main__"' not in combined_code: | |
combined_code += '\n\nif __name__ == "__main__":\n demo.launch()' | |
# Save to app.py | |
with open("app.py", "w") as f: | |
f.write(combined_code) | |
print("Extracted Gradio code and saved to app.py") | |
# Clone the existing Space repository | |
repo = Repository( | |
local_dir="space_repo", | |
clone_from=f"https://huggingface.co/spaces/{space_name}", | |
token=hf_token, | |
git_user="marwashahid", | |
git_email="[email protected]" | |
) | |
# Copy app.py to the repository | |
import shutil | |
shutil.copy("app.py", "space_repo/app.py") | |
# Add requirements if needed | |
requirements = """ | |
gradio>=3.50.2 | |
""" | |
with open("space_repo/requirements.txt", "w") as f: | |
f.write(requirements) | |
# Commit and push changes | |
repo.git_add() | |
repo.git_commit("Update from Kaggle notebook") | |
repo.git_push() | |
print(f"Successfully deployed to https://huggingface.co/spaces/{space_name}") |