import gradio as gr from transformers import AutoModelForSeq2SeqLM, AutoTokenizer import spaces import os # Set the model and tokenizer model_name = "meta-llama/Meta-Llama-3-70B-Instruct" lora_name = "Thermostatic/Llama-3-NeuralTranslate-Instructions-70b-v0.1-lora" model = AutoModelForSeq2SeqLM.from_pretrained(model_name, token=os.environ["HUGGINGFACE_TOKEN"]) tokenizer = AutoTokenizer.from_pretrained(model_name) lora_adapter = model.load_adapter(lora_name, with_head=False) model.to('cuda') @spaces.GPU def translate(input_text): input_ids = tokenizer.encode(input_text, return_tensors="pt") response = model.generate(input_ids, adapter_name=lora_name, max_length=1024) response_text = tokenizer.decode(response[0], skip_special_tokens=True) return f"Translated text: {response_text}" with gr.Blocks() as demo: with gr.Row(): input_text = gr.Textbox(label="Enter a message to translate:") submit = gr.Button("Translate") output = gr.Textbox(label="Translated text:") submit.click(fn=translate, inputs="input_text", outputs="output") demo.launch()