Spaces:
Sleeping
Sleeping
import streamlit as st | |
import wikipedia | |
from haystack.document_stores import InMemoryDocumentStore | |
from haystack.utils import clean_wiki_text, convert_files_to_docs | |
from haystack.nodes import TfidfRetriever, FARMReader | |
from haystack.pipelines import ExtractiveQAPipeline | |
from main import print_qa, QuestionGenerator | |
def main(): | |
# Set the Streamlit app title | |
st.title("Question Generation using Haystack and Streamlit") | |
# Select the input type | |
inputs = ["Input Paragraph", "Wikipedia Examples"] | |
input_type = st.selectbox("Select an input type:", inputs) | |
# Initialize wiki_text as an empty string | |
wiki_text = "" | |
# Handle different input types | |
if input_type == "Input Paragraph": | |
# Allow user to input text paragraph | |
wiki_text = st.text_area("Input paragraph:", height=200) | |
elif input_type == "Wikipedia Examples": | |
# Define topics for selection | |
topics = ["Deep Learning", "Machine Learning"] | |
selected_topic = st.selectbox("Select a topic:", topics) | |
# Retrieve Wikipedia content based on the selected topic | |
if selected_topic: | |
wiki = wikipedia.page(selected_topic) | |
wiki_text = wiki.content | |
# Display the retrieved Wikipedia content (optional) | |
st.text_area("Retrieved Wikipedia content:", wiki_text, height=200) | |
# Preprocess the input text | |
wiki_text = clean_wiki_text(wiki_text) | |
# Allow user to specify the number of questions to generate | |
num_questions = st.slider("Number of questions to generate:", min_value=1, max_value=20, value=5) | |
# Allow user to specify the model to use | |
model_options = ["deepset/roberta-base-squad2", "deepset/roberta-base-squad2-distilled", "bert-large-uncased-whole-word-masking-squad2", "deepset/flan-t5-xl-squad2"] | |
model_name = st.selectbox("Select model:", model_options) | |
# Button to generate questions | |
if st.button("Generate Questions"): | |
document_store = InMemoryDocumentStore() | |
# Convert the preprocessed text into a document | |
document = {"content": wiki_text} | |
document_store.write_documents([document]) | |
# Initialize a TfidfRetriever | |
retriever = TfidfRetriever(document_store=document_store) | |
# Initialize a FARMReader with the selected model | |
reader = FARMReader(model_name_or_path=model_name, use_gpu=False) | |
# Initialize the question generation pipeline | |
pipe = ExtractiveQAPipeline(reader, retriever) | |
# Initialize the QuestionGenerator | |
qg = QuestionGenerator() | |
# Generate multiple-choice questions | |
qa_list = qg.generate( | |
wiki_text, | |
num_questions=num_questions, | |
answer_style='multiple_choice' | |
) | |
# Display the generated questions and answers | |
st.header("Generated Questions and Answers:") | |
for idx, qa in enumerate(qa_list): | |
# Display the question | |
st.write(f"Question {idx + 1}: {qa['question']}") | |
# Display the answer options | |
if 'answer' in qa: | |
for i, option in enumerate(qa['answer']): | |
correct_marker = "(correct)" if option["correct"] else "" | |
st.write(f"Option {i + 1}: {option['answer']} {correct_marker}") | |
# Add a separator after each question-answer pair | |
st.write("-" * 40) | |
# Run the Streamlit app | |
if __name__ == "__main__": | |
main() | |