Browse files
@@ -1,43 +1,58 @@
1 |
import streamlit as st
2 |
from transformers import T5ForConditionalGeneration, T5Tokenizer
3 |
import torch
4 |
import spacy
5 |
import nltk
6 |
from b import b
7 |'punkt')
8 |
from nltk.tokenize import sent_tokenize
9 |
10 |
# Load spaCy model
11 |
nlp = spacy.load("en_core_web_sm")
12 |
13 |
# Load T5 model and tokenizer
14 |
model_name = "DevBM/t5-large-squad"
15 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
16 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
17 |
18 |
# Function to extract keywords using
19 |
def extract_keywords(text):
20 |
doc = nlp(text)
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
# Function to map keywords to sentences
32 |
def map_keywords_to_sentences(text, keywords):
33 |
sentences = sent_tokenize(text)
34 |
keyword_sentence_mapping = {}
35 |
for keyword in keywords:
36 |
for i, sentence in enumerate(sentences):
37 |
if keyword in sentence:
38 |
# Combine current sentence with surrounding sentences for context
39 |
start = max(0, i-
40 |
end = min(len(sentences), i+
41 |
context = ' '.join(sentences[start:end])
42 |
if keyword not in keyword_sentence_mapping:
43 |
keyword_sentence_mapping[keyword] = context
@@ -45,28 +60,77 @@ def map_keywords_to_sentences(text, keywords):
45 |
keyword_sentence_mapping[keyword] += ' ' + context
46 |
return keyword_sentence_mapping
47 |
48 |
# Function to
49 |
50 |
input_text = f"<context> {context} <answer> {answer}"
51 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
52 |
outputs = model.generate(input_ids)
53 |
question = tokenizer.decode(outputs[0], skip_special_tokens=True)
54 |
return question
55 |
56 |
# Streamlit interface
57 |
st.title("Question Generator from Text")
58 |
text = st.text_area("Enter text here:")
59 |
if st.button("Generate Questions"):
60 |
if text:
61 |
keywords = extract_keywords(text)
62 |
keyword_sentence_mapping = map_keywords_to_sentences(text, keywords)
63 |
64 |
st.subheader("Generated Questions:")
65 |
66 |
67 |
st.write(f"**Context:** {context}")
68 |
st.write(f"**Answer:** {keyword}")
69 |
st.write(f"**Question:** {question}")
70 |
71 |
72 |
st.write("Please enter some text to generate questions.")
1 |
import streamlit as st
2 |
from transformers import T5ForConditionalGeneration, T5Tokenizer
3 |
import spacy
4 |
import nltk
5 |
from sklearn.feature_extraction.text import TfidfVectorizer
6 |
from rake_nltk import Rake
7 |
import pandas as pd
8 |
from fpdf import FPDF
9 |
import wikipediaapi
10 |
from b import b
11 |
12 |'punkt')
13 |
from nltk.tokenize import sent_tokenize
14 |
15 |
# Load spaCy model
16 |
nlp = spacy.load("en_core_web_sm")
17 |
# wiki_wiki = wikipediaapi.Wikipedia('en')
18 |
19 |
# Load T5 model and tokenizer
20 |
model_name = "DevBM/t5-large-squad"
21 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
22 |
tokenizer = T5Tokenizer.from_pretrained(model_name)
23 |
24 |
# Function to extract keywords using combined techniques
25 |
def extract_keywords(text):
26 |
# Use RAKE
27 |
rake = Rake()
28 |
29 |
rake_keywords = set(rake.get_ranked_phrases())
30 |
31 |
# Use spaCy for NER and POS tagging
32 |
doc = nlp(text)
33 |
spacy_keywords = set([ent.text for ent in doc.ents])
34 |
spacy_keywords.update([token.text for token in doc if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]])
35 |
36 |
# Use TF-IDF
37 |
vectorizer = TfidfVectorizer(stop_words='english')
38 |
X = vectorizer.fit_transform([text])
39 |
tfidf_keywords = set(vectorizer.get_feature_names_out())
40 |
41 |
# Combine all keywords
42 |
combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords)
43 |
44 |
return list(combined_keywords)
45 |
46 |
# Function to map keywords to sentences with customizable context window size
47 |
def map_keywords_to_sentences(text, keywords, context_window_size):
48 |
sentences = sent_tokenize(text)
49 |
keyword_sentence_mapping = {}
50 |
for keyword in keywords:
51 |
for i, sentence in enumerate(sentences):
52 |
if keyword in sentence:
53 |
# Combine current sentence with surrounding sentences for context
54 |
start = max(0, i - context_window_size)
55 |
end = min(len(sentences), i + context_window_size + 1)
56 |
context = ' '.join(sentences[start:end])
57 |
if keyword not in keyword_sentence_mapping:
58 |
keyword_sentence_mapping[keyword] = context
60 |
keyword_sentence_mapping[keyword] += ' ' + context
61 |
return keyword_sentence_mapping
62 |
63 |
# Function to perform entity linking using Wikipedia API
64 |
# def entity_linking(keyword):
65 |
# page =
66 |
# if page.exists():
67 |
# return page.fullurl
68 |
# return None
69 |
70 |
# Function to generate questions using beam search
71 |
def generate_question(context, answer, num_beams=5):
72 |
input_text = f"<context> {context} <answer> {answer}"
73 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
74 |
outputs = model.generate(input_ids, num_beams=num_beams, early_stopping=True)
75 |
question = tokenizer.decode(outputs[0], skip_special_tokens=True)
76 |
return question
77 |
78 |
# Function to export questions to CSV
79 |
def export_to_csv(data):
80 |
df = pd.DataFrame(data, columns=["Context", "Answer", "Question"])
81 |
df.to_csv('questions.csv', index=False)
82 |
83 |
# Function to export questions to PDF
84 |
def export_to_pdf(data):
85 |
pdf = FPDF()
86 |
87 |
pdf.set_font("Arial", size=12)
88 |
89 |
for context, answer, question in data:
90 |
pdf.multi_cell(0, 10, f"Context: {context}")
91 |
pdf.multi_cell(0, 10, f"Answer: {answer}")
92 |
pdf.multi_cell(0, 10, f"Question: {question}")
93 |
94 |
95 |
96 |
97 |
# Streamlit interface
98 |
st.title("Question Generator from Text")
99 |
text = st.text_area("Enter text here:", value="Joe Biden, the current US president is on a weak wicket going in for his reelection later this November against former President Donald Trump.")
100 |
101 |
# Customization options
102 |
num_beams = st.slider("Select number of beams for question generation", min_value=1, max_value=10, value=5)
103 |
context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
104 |
num_questions = st.slider("Select number of questions to generate", min_value=1, max_value=1000, value=5)
105 |
question_complexity = st.selectbox("Select question complexity", ["Simple", "Intermediate", "Complex"])
106 |
107 |
if st.button("Generate Questions"):
108 |
if text:
109 |
keywords = extract_keywords(text)
110 |
keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
111 |
112 |
st.subheader("Generated Questions:")
113 |
data = []
114 |
for i, (keyword, context) in enumerate(keyword_sentence_mapping.items()):
115 |
if i >= num_questions:
116 |
117 |
# linked_entity = entity_linking(keyword)
118 |
question = generate_question(context, keyword, num_beams=num_beams)
119 |
st.write(f"**Context:** {context}")
120 |
st.write(f"**Answer:** {keyword}")
121 |
st.write(f"**Question:** {question}")
122 |
# if linked_entity:
123 |
# st.write(f"**Entity Link:** {linked_entity}")
124 |
125 |
data.append((context, keyword, question))
126 |
127 |
# Export buttons
128 |
if st.button("Export to CSV"):
129 |
130 |
st.success("Questions exported to questions.csv")
131 |
132 |
if st.button("Export to PDF"):
133 |
134 |
st.success("Questions exported to questions.pdf")
135 |
136 |
st.write("Please enter some text to generate questions.")