Spaces:

ambrosfitz
/

t5-history-MC

Runtime error

File size: 2,909 Bytes

import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, T5Tokenizer
import time

# Load the model and tokenizer from Hugging Face
model_name = "ambrosfitz/history-qa-t5-base"
try:
    model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
    tokenizer = T5Tokenizer.from_pretrained(model_name, use_fast=False)
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    raise

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

def generate_qa(text, max_length=512):
    input_text = f"Generate question: {text}"
    input_ids = tokenizer(input_text, return_tensors="pt", max_length=max_length, truncation=True).input_ids.to(device)
    
    with torch.no_grad():
        outputs = model.generate(input_ids, max_length=max_length, num_return_sequences=1, do_sample=True, temperature=0.7)
    
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Parse the generated text
    parts = generated_text.split("Question: ")
    if len(parts) > 1:
        qa_parts = parts[1].split("Options:")
        question = qa_parts[0].strip()
        
        options_and_answer = qa_parts[1].split("Correct Answer:")
        options = options_and_answer[0].strip()
        
        answer_and_explanation = options_and_answer[1].split("Explanation:")
        correct_answer = answer_and_explanation[0].strip()
        explanation = answer_and_explanation[1].strip() if len(answer_and_explanation) > 1 else "No explanation provided."
        
        return f"Question: {question}\n\nOptions: {options}\n\nCorrect Answer: {correct_answer}\n\nExplanation: {explanation}"
    else:
        return "Unable to generate a proper question and answer. Please try again with a different input."

def slow_qa(message, history):
    full_response = generate_qa(message)
    for i in range(len(full_response)):
        time.sleep(0.01)  # Adjust this value to control the speed of the response
        yield full_response[:i+1]

# Create and launch the Gradio interface
gr.ChatInterface(
    slow_qa,
    chatbot=gr.Chatbot(height=500),
    textbox=gr.Textbox(placeholder="Enter historical text here...", container=False, scale=7),
    title="History Q&A Generator",
    description="Enter a piece of historical text, and the model will generate a related question, answer options, correct answer, and explanation.",
    theme="soft",
    examples=[
        "The American Revolution was a colonial revolt that took place between 1765 and 1783.",
        "World War II was a global conflict that lasted from 1939 to 1945, involving many of the world's nations.",
        "The Renaissance was a period of cultural, artistic, political, and economic revival following the Middle Ages."
    ],
    cache_examples=False,
    retry_btn="Regenerate",
    undo_btn="Remove last",
    clear_btn="Clear",
).launch()