Spaces:
Runtime error
Runtime error
File size: 1,958 Bytes
96cf708 a544069 96cf708 5495567 96cf708 5fe82b2 96cf708 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM
config = PeftConfig.from_pretrained("AliEssa555/latest-podcast-model-ft")
base_model = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.2-GPTQ")
model = PeftModel.from_pretrained(base_model, "AliEssa555/latest-podcast-model-ft")
#model_name = "path_to_your_fine_tuned_model" # Use the local path or the Hugging Face model hub ID if published
#model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model)
if torch.cuda.is_available():
model = model.to("cuda")
# Generate a response based on user input
def generate_response(user_input):
# Format the input as an instructional prompt
prompt = f"[INST] User: {user_input} [/INST] Assistant:"
# Tokenize input and generate response
inputs = tokenizer(prompt, return_tensors="pt").to("cuda" if torch.cuda.is_available() else "cpu")
output_tokens = model.generate(inputs["input_ids"], max_length=512, temperature=0.7, top_p=0.9, do_sample=True)
# Decode and format the output
response = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
return response.split("Assistant:")[-1].strip() # Remove "Assistant:" tag if present
# Define Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## LLM Podcast Response Generator")
with gr.Row():
user_input = gr.Textbox(label="Enter your question related to the podcast:", placeholder="Type your question here...")
with gr.Row():
response_output = gr.Textbox(label="Model's Response")
submit_button = gr.Button("Generate Response")
# Connect button to the function
submit_button.click(fn=generate_response, inputs=user_input, outputs=response_output)
# Launch the Gradio app
demo.launch()
|