Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,15 +15,18 @@ from nltk.tokenize import sent_tokenize
|
|
15 |
nltk.download('wordnet')
|
16 |
from nltk.corpus import wordnet
|
17 |
import random
|
18 |
-
|
|
|
19 |
# Load spaCy model
|
20 |
nlp = spacy.load("en_core_web_sm")
|
|
|
21 |
|
|
|
22 |
# Initialize Wikipedia API with a user agent
|
23 |
user_agent = 'QGen/1.0 ([email protected])'
|
24 |
wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
|
25 |
|
26 |
-
@st.cache_resource
|
27 |
def load_model():
|
28 |
model_name = "DevBM/t5-large-squad"
|
29 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
@@ -55,20 +58,19 @@ def extract_keywords(text):
|
|
55 |
# Load spaCy model (medium-sized model with word vectors)
|
56 |
nlp = spacy.load("en_core_web_md")
|
57 |
|
58 |
-
def
|
59 |
-
#
|
60 |
-
|
|
|
|
|
|
|
61 |
|
62 |
-
#
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
similarity = nlp(w.text).similarity(nlp(word))
|
67 |
-
similar_words.append((w.text, similarity))
|
68 |
|
69 |
-
|
70 |
-
similar_words.sort(key=lambda x: x[1], reverse=True)
|
71 |
-
return [word for word, _ in similar_words[:n]]
|
72 |
|
73 |
def get_synonyms(word, n=3):
|
74 |
synonyms = []
|
@@ -83,8 +85,8 @@ def get_synonyms(word, n=3):
|
|
83 |
def generate_options(answer, context, n=3):
|
84 |
options = [answer]
|
85 |
|
86 |
-
# Try to get similar words based on
|
87 |
-
similar_words =
|
88 |
options.extend(similar_words)
|
89 |
|
90 |
# If we don't have enough options, try synonyms
|
@@ -138,6 +140,7 @@ def entity_linking(keyword):
|
|
138 |
return None
|
139 |
|
140 |
# Function to generate questions using beam search
|
|
|
141 |
def generate_question(context, answer, num_beams):
|
142 |
input_text = f"<context> {context} <answer> {answer}"
|
143 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
|
@@ -147,7 +150,7 @@ def generate_question(context, answer, num_beams):
|
|
147 |
|
148 |
# Function to export questions to CSV
|
149 |
def export_to_csv(data):
|
150 |
-
df = pd.DataFrame(data, columns=["Context", "Answer", "Question"])
|
151 |
csv = df.to_csv(index=False,encoding='utf-8')
|
152 |
return csv
|
153 |
|
@@ -157,7 +160,7 @@ def export_to_pdf(data):
|
|
157 |
pdf.add_page()
|
158 |
pdf.set_font("Arial", size=12)
|
159 |
|
160 |
-
for context, answer, question in data:
|
161 |
pdf.multi_cell(0, 10, f"Context: {context}")
|
162 |
pdf.multi_cell(0, 10, f"Answer: {answer}")
|
163 |
pdf.multi_cell(0, 10, f"Question: {question}")
|
@@ -199,13 +202,13 @@ if st.button("Generate Questions"):
|
|
199 |
st.write(f"**Answer:** {keyword}")
|
200 |
st.write(f"**Question:** {question}")
|
201 |
st.write(f"**Options:**")
|
202 |
-
for j, option in options:
|
203 |
st.write(f"{chr(65+j)}. {option}")
|
204 |
|
205 |
if linked_entity:
|
206 |
st.write(f"**Entity Link:** {linked_entity}")
|
207 |
st.write("---")
|
208 |
-
data.append((context, keyword, question))
|
209 |
|
210 |
# Add the data to session state
|
211 |
st.session_state.data = data
|
@@ -224,4 +227,4 @@ if st.button("Generate Questions"):
|
|
224 |
|
225 |
|
226 |
else:
|
227 |
-
st.write("Please enter some text to generate questions.")
|
|
|
15 |
nltk.download('wordnet')
|
16 |
from nltk.corpus import wordnet
|
17 |
import random
|
18 |
+
from sense2vec import Sense2Vec
|
19 |
+
import sense2vec
|
20 |
# Load spaCy model
|
21 |
nlp = spacy.load("en_core_web_sm")
|
22 |
+
# s2v = Sense2Vec.from_disk(self=Sense2Vec,path='s2v_old')
|
23 |
|
24 |
+
s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
|
25 |
# Initialize Wikipedia API with a user agent
|
26 |
user_agent = 'QGen/1.0 ([email protected])'
|
27 |
wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
|
28 |
|
29 |
+
@st.cache_resource
|
30 |
def load_model():
|
31 |
model_name = "DevBM/t5-large-squad"
|
32 |
model = T5ForConditionalGeneration.from_pretrained(model_name)
|
|
|
58 |
# Load spaCy model (medium-sized model with word vectors)
|
59 |
nlp = spacy.load("en_core_web_md")
|
60 |
|
61 |
+
def get_similar_words_sense2vec(word, n=3):
|
62 |
+
# Try to find the word with its most likely part-of-speech
|
63 |
+
word_with_pos = word + "|NOUN"
|
64 |
+
if word_with_pos in s2v:
|
65 |
+
similar_words = s2v.most_similar(word_with_pos, n=n)
|
66 |
+
return [word.split("|")[0] for word, _ in similar_words]
|
67 |
|
68 |
+
# If not found, try without POS
|
69 |
+
if word in s2v:
|
70 |
+
similar_words = s2v.most_similar(word, n=n)
|
71 |
+
return [word.split("|")[0] for word, _ in similar_words]
|
|
|
|
|
72 |
|
73 |
+
return []
|
|
|
|
|
74 |
|
75 |
def get_synonyms(word, n=3):
|
76 |
synonyms = []
|
|
|
85 |
def generate_options(answer, context, n=3):
|
86 |
options = [answer]
|
87 |
|
88 |
+
# Try to get similar words based on sense2vec
|
89 |
+
similar_words = get_similar_words_sense2vec(answer, n)
|
90 |
options.extend(similar_words)
|
91 |
|
92 |
# If we don't have enough options, try synonyms
|
|
|
140 |
return None
|
141 |
|
142 |
# Function to generate questions using beam search
|
143 |
+
@st.cache_data
|
144 |
def generate_question(context, answer, num_beams):
|
145 |
input_text = f"<context> {context} <answer> {answer}"
|
146 |
input_ids = tokenizer.encode(input_text, return_tensors='pt')
|
|
|
150 |
|
151 |
# Function to export questions to CSV
|
152 |
def export_to_csv(data):
|
153 |
+
df = pd.DataFrame(data, columns=["Context", "Answer", "Question", "Options"])
|
154 |
csv = df.to_csv(index=False,encoding='utf-8')
|
155 |
return csv
|
156 |
|
|
|
160 |
pdf.add_page()
|
161 |
pdf.set_font("Arial", size=12)
|
162 |
|
163 |
+
for context, answer, question, options in data:
|
164 |
pdf.multi_cell(0, 10, f"Context: {context}")
|
165 |
pdf.multi_cell(0, 10, f"Answer: {answer}")
|
166 |
pdf.multi_cell(0, 10, f"Question: {question}")
|
|
|
202 |
st.write(f"**Answer:** {keyword}")
|
203 |
st.write(f"**Question:** {question}")
|
204 |
st.write(f"**Options:**")
|
205 |
+
for j, option in enumerate(options):
|
206 |
st.write(f"{chr(65+j)}. {option}")
|
207 |
|
208 |
if linked_entity:
|
209 |
st.write(f"**Entity Link:** {linked_entity}")
|
210 |
st.write("---")
|
211 |
+
data.append((context, keyword, question, options))
|
212 |
|
213 |
# Add the data to session state
|
214 |
st.session_state.data = data
|
|
|
227 |
|
228 |
|
229 |
else:
|
230 |
+
st.write("Please enter some text to generate questions.")
|