Spaces:
Runtime error
Runtime error
import spaces | |
import os | |
import gradio as gr | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from huggingface_hub import login | |
# Get the Hugging Face token from environment variables | |
huggingface_token = os.getenv("HF_TOKEN") | |
if huggingface_token is None: | |
raise ValueError("Hugging Face token not set. Please set the HUGGINGFACE_HUB_TOKEN environment variable.") | |
# Login using the Hugging Face token | |
login(huggingface_token) | |
# Load the model and tokenizer | |
model_name = "meta-llama/Meta-Llama-3.1-8B" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
# Move the model to GPU if available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
# Request GPU resources for 120 seconds | |
# Define the prediction function | |
def predict(input_text, temperature=0.2): | |
try: | |
inputs = tokenizer.encode(input_text, return_tensors="pt").to(device) | |
outputs = model.generate(inputs, temperature=temperature, max_new_tokens=50) | |
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return prediction | |
except Exception as e: | |
return str(e) | |
# Create Gradio interface | |
interface = gr.Interface( | |
fn=predict, | |
inputs=[ | |
gr.Textbox(lines=2, placeholder="Enter text here...", label="Input Text"), | |
gr.Slider(minimum=0, maximum=1, value=0.2, label="Temperature") | |
], | |
outputs=gr.Textbox(label="Output Text"), | |
title="Transformer Model Prediction", | |
description="Enter text and adjust the temperature to get predictions from the transformer model." | |
) | |
# Launch the Gradio app | |
interface.launch(server_name="0.0.0.0", server_port=7860) |