import gradio as grad import torch import spaces from unsloth import FastLanguageModel from transformers import AutoTokenizer device = "cuda" # Load your fine-tuned Phi-3 model from Hugging Face MODEL_NAME = "jcrissa/phi3-new-t2i" # device = "cuda" if torch.cuda.is_available() else "cpu" device = "cuda" @spaces.GPU def load_phi3_model(): try: # Load the Phi-3 model and tokenizer from Hugging Face model, tokenizer = FastLanguageModel.from_pretrained( MODEL_NAME, max_seq_length=4096, # Ensure it matches your fine-tuning dtype=torch.float16 if device == "cuda" else torch.float32 # Use `float16` for GPU, `float32` for CPU ) model.to(device) # Prepare the model for inference model = FastLanguageModel.for_inference(model) # This is the necessary line # Configure tokenizer settings tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "left" return model, tokenizer except Exception as e: print(f"Error loading model: {e}") return None, None # Load the model and tokenizer, ensure error handling phi3_model, phi3_tokenizer = load_phi3_model() if phi3_model is None or phi3_tokenizer is None: raise RuntimeError("Model and tokenizer could not be loaded. Please check the Hugging Face model path or network connection.") # Function to generate text using Phi-3 def generate(plain_text): try: # Tokenize input text and move to the device input_ids = phi3_tokenizer(plain_text.strip(), return_tensors="pt").input_ids.to(device) eos_id = phi3_tokenizer.eos_token_id # Generate the output from the model using sampling instead of beam search outputs = phi3_model.generate( input_ids, do_sample=True, # Use sampling instead of beam search max_new_tokens=75, num_return_sequences=1, eos_token_id=eos_id, pad_token_id=eos_id, length_penalty=-1.0 ) # Decode and return the generated text output_text = phi3_tokenizer.decode(outputs[0], skip_special_tokens=True) return output_text.strip() except Exception as e: return f"Error during text generation: {e}" # Setup Gradio Interface txt = grad.Textbox(lines=1, label="Input Text", placeholder="Enter your prompt") out = grad.Textbox(lines=1, label="Generated Text") grad.Interface( fn=generate, inputs=txt, outputs=out, title="Fine-Tuned Phi-3 Model", description="This demo uses a fine-tuned Phi-3 model to optimize text prompts.", flagging_mode="never", # Replace `allow_flagging` with `flagging_mode` cache_examples=False, theme="default" ).launch(share=True) # Use `queue=True` instead of `enable_queue`