DevBM commited on
Commit
ac0aab4
·
verified ·
1 Parent(s): d90ce6c

Reverting to updated app.py(only enhanced ner function is not working that is commented)

Browse files
Files changed (1) hide show
  1. app.py +28 -615
app.py CHANGED
@@ -1,50 +1,9 @@
1
- import streamlit as st
2
- from transformers import T5ForConditionalGeneration, T5Tokenizer
3
- import spacy
4
  import nltk
5
- from sklearn.feature_extraction.text import TfidfVectorizer
6
- from rake_nltk import Rake
7
- import pandas as pd
8
- from fpdf import FPDF
9
- import wikipediaapi
10
- from functools import lru_cache
11
  nltk.download('punkt')
12
  nltk.download('stopwords')
13
  nltk.download('brown')
14
- from nltk.tokenize import sent_tokenize
15
  nltk.download('wordnet')
16
- from nltk.corpus import wordnet
17
- import random
18
- import sense2vec
19
- from wordcloud import WordCloud
20
- import matplotlib.pyplot as plt
21
- import json
22
- import os
23
- from sentence_transformers import SentenceTransformer, util
24
- import textstat
25
- from spellchecker import SpellChecker
26
- from transformers import pipeline
27
- import re
28
- import pymupdf
29
- import uuid
30
- import time
31
- import asyncio
32
- import aiohttp
33
- from datetime import datetime
34
- import base64
35
- from io import BytesIO
36
- # '-----------------'
37
- import smtplib
38
- from email.mime.multipart import MIMEMultipart
39
- from email.mime.text import MIMEText
40
- from email.mime.base import MIMEBase
41
- from email.mime.application import MIMEApplication
42
- from email import encoders
43
- # '------------------'
44
- from gliner import GLiNER
45
- # -------------------
46
-
47
- print("***************************************************************")
48
 
49
  st.set_page_config(
50
  page_icon='cyclone',
@@ -55,62 +14,19 @@ st.set_page_config(
55
  }
56
  )
57
 
58
- st.set_option('deprecation.showPyplotGlobalUse',False)
59
-
60
- class QuestionGenerationError(Exception):
61
- """Custom exception for question generation errors."""
62
- pass
63
-
64
-
65
- # Initialize Wikipedia API with a user agent
66
- user_agent = 'QGen/1.2'
67
- wiki_wiki = wikipediaapi.Wikipedia(user_agent= user_agent,language='en')
68
-
69
- def get_session_id():
70
- if 'session_id' not in st.session_state:
71
- st.session_state.session_id = str(uuid.uuid4())
72
- return st.session_state.session_id
73
-
74
- def initialize_state(session_id):
75
- if 'session_states' not in st.session_state:
76
- st.session_state.session_states = {}
77
-
78
- if session_id not in st.session_state.session_states:
79
- st.session_state.session_states[session_id] = {
80
- 'generated_questions': [],
81
- # add other state variables as needed
82
- }
83
- return st.session_state.session_states[session_id]
84
-
85
- def get_state(session_id):
86
- return st.session_state.session_states[session_id]
87
-
88
- def set_state(session_id, key, value):
89
- st.session_state.session_states[session_id][key] = value
90
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- @st.cache_resource
93
- def load_model(modelname):
94
- model_name = modelname
95
- model = T5ForConditionalGeneration.from_pretrained(model_name)
96
- tokenizer = T5Tokenizer.from_pretrained(model_name)
97
- return model, tokenizer
98
-
99
- # Load Spacy Model
100
- @st.cache_resource
101
- def load_nlp_models():
102
- nlp = spacy.load("en_core_web_md")
103
- s2v = sense2vec.Sense2Vec().from_disk('s2v_old')
104
- return nlp, s2v
105
-
106
- # Load Quality Assurance Models
107
- @st.cache_resource
108
- def load_qa_models():
109
- # Initialize BERT model for sentence similarity
110
- similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
111
-
112
- spell = SpellChecker()
113
- return similarity_model, spell
114
 
115
  with st.sidebar:
116
  select_model = st.selectbox("Select Model", ("T5-large","T5-small"))
@@ -118,514 +34,8 @@ if select_model == "T5-large":
118
  modelname = "DevBM/t5-large-squad"
119
  elif select_model == "T5-small":
120
  modelname = "AneriThakkar/flan-t5-small-finetuned"
121
- nlp, s2v = load_nlp_models()
122
- similarity_model, spell = load_qa_models()
123
- context_model = similarity_model
124
- model, tokenizer = load_model(modelname)
125
-
126
-
127
- # Info Section
128
- def display_info():
129
- st.sidebar.title("Information")
130
- st.sidebar.markdown("""
131
- ### Question Generator System
132
- This system is designed to generate questions based on the provided context. It uses various NLP techniques and models to:
133
- - Extract keywords from the text
134
- - Map keywords to sentences
135
- - Generate questions
136
- - Provide multiple choice options
137
- - Assess the quality of generated questions
138
- #### Key Features:
139
- - **Keyword Extraction:** Combines RAKE, TF-IDF, and spaCy for comprehensive keyword extraction.
140
- - **Question Generation:** Utilizes a pre-trained T5 model for generating questions.
141
- - **Options Generation:** Creates contextually relevant multiple-choice options.
142
- - **Question Assessment:** Scores questions based on relevance, complexity, and spelling correctness.
143
- - **Feedback Collection:** Allows users to rate the generated questions and provides statistics on feedback.
144
- #### Customization Options:
145
- - Number of beams for question generation
146
- - Context window size for mapping keywords to sentences
147
- - Number of questions to generate
148
- - Additional display elements (context, answer, options, entity link, QA scores)
149
- #### Outputs:
150
- - Generated questions with multiple-choice options
151
- - Download options for CSV and PDF formats
152
- - Visualization of overall scores
153
- """)
154
-
155
- def get_pdf_text(pdf_file):
156
- doc = pymupdf.open(stream=pdf_file.read(), filetype="pdf")
157
- text = ""
158
- for page_num in range(doc.page_count):
159
- page = doc.load_page(page_num)
160
- text += page.get_text()
161
- return text
162
-
163
- def save_feedback_og(question, answer, rating, options, context):
164
- feedback_file = 'question_feedback.json'
165
- if os.path.exists(feedback_file):
166
- with open(feedback_file, 'r') as f:
167
- feedback_data = json.load(f)
168
- else:
169
- feedback_data = []
170
- tpl = {
171
- 'question' : question,
172
- 'answer' : answer,
173
- 'context' : context,
174
- 'options' : options,
175
- 'rating' : rating,
176
- }
177
- # feedback_data[question] = rating
178
- feedback_data.append(tpl)
179
- print(feedback_data)
180
- with open(feedback_file, 'w') as f:
181
- json.dump(feedback_data, f)
182
-
183
- return feedback_file
184
-
185
- # -----------------------------------------------------------------------------------------
186
- def send_email_with_attachment(email_subject, email_body, recipient_emails, sender_email, sender_password, attachment):
187
- smtp_server = "smtp.gmail.com" # Replace with your SMTP server
188
- smtp_port = 587 # Replace with your SMTP port
189
-
190
- # Create the email message
191
- message = MIMEMultipart()
192
- message['From'] = sender_email
193
- message['To'] = ", ".join(recipient_emails)
194
- message['Subject'] = email_subject
195
- message.attach(MIMEText(email_body, 'plain'))
196
-
197
- # Attach the feedback data if available
198
- if attachment:
199
- attachment_part = MIMEApplication(attachment.getvalue(), Name="feedback_data.json")
200
- attachment_part['Content-Disposition'] = f'attachment; filename="feedback_data.json"'
201
- message.attach(attachment_part)
202
-
203
- # Send the email
204
- try:
205
- with smtplib.SMTP(smtp_server, smtp_port) as server:
206
- server.starttls()
207
- print(sender_email)
208
- print(sender_password)
209
- server.login(sender_email, sender_password)
210
- text = message.as_string()
211
- server.sendmail(sender_email, recipient_emails, text)
212
- return True
213
- except Exception as e:
214
- st.error(f"Failed to send email: {str(e)}")
215
- return False
216
- # ----------------------------------------------------------------------------------
217
-
218
- def collect_feedback(i,question, answer, context, options):
219
- st.write("Please provide feedback for this question:")
220
- edited_question = st.text_input("Enter improved question",value=question,key=f'fdx1{i}')
221
- clarity = st.slider("Clarity", 1, 5, 3, help="1 = Very unclear, 5 = Very clear",key=f'fdx2{i}')
222
- difficulty = st.slider("Difficulty", 1, 5, 3, help="1 = Very easy, 5 = Very difficult",key=f'fdx3{i}')
223
- relevance = st.slider("Relevance", 1, 5, 3, help="1 = Not relevant, 5 = Highly relevant",key=f'fdx4{i}')
224
- option_quality = st.slider("Quality of Options", 1, 5, 3, help="1 = Poor options, 5 = Excellent options",key=f'fdx5{i}')
225
- overall_rating = st.slider("Overall Rating", 1, 5, 3, help="1 = Poor, 5 = Excellent",key=f'fdx6{i}')
226
- comments = st.text_input("Additional Comments", "",key=f'fdx7{i}')
227
-
228
- if st.button("Submit Feedback",key=f'fdx8{i}'):
229
- feedback = {
230
- "question": question,
231
- 'edited_question':edited_question,
232
- "answer": answer,
233
- "options": options,
234
- "clarity": clarity,
235
- "difficulty": difficulty,
236
- "relevance": relevance,
237
- "option_quality": option_quality,
238
- "overall_rating": overall_rating,
239
- "comments": comments
240
- }
241
- save_feedback(feedback)
242
- st.success("Thank you for your feedback!")
243
-
244
- def save_feedback(feedback):
245
- st.session_state.feedback_data.append(feedback)
246
-
247
- def analyze_feedback():
248
- if not st.session_state.feedback_data:
249
- st.warning("No feedback data available yet.")
250
- return
251
-
252
- df = pd.DataFrame(st.session_state.feedback_data)
253
-
254
- st.write("Feedback Analysis")
255
- st.write(f"Total feedback collected: {len(df)}")
256
-
257
- metrics = ['clarity', 'difficulty', 'relevance', 'option_quality', 'overall_rating']
258
-
259
- for metric in metrics:
260
- fig, ax = plt.subplots()
261
- df[metric].value_counts().sort_index().plot(kind='bar', ax=ax)
262
- plt.title(f"Distribution of {metric.capitalize()} Ratings")
263
- plt.xlabel("Rating")
264
- plt.ylabel("Count")
265
- st.pyplot(fig)
266
-
267
- st.write("Average Ratings:")
268
- st.write(df[metrics].mean())
269
-
270
- # Word cloud of comments
271
- comments = " ".join(df['comments'])
272
- if len(comments) > 1:
273
- wordcloud = WordCloud(width=800, height=400, background_color='white').generate(comments)
274
- fig, ax = plt.subplots()
275
- plt.imshow(wordcloud, interpolation='bilinear')
276
- plt.axis("off")
277
- st.pyplot(fig)
278
-
279
-
280
- def export_feedback_data():
281
- if not st.session_state.feedback_data:
282
- st.warning("No feedback data available.")
283
- return None
284
-
285
- # Convert feedback data to JSON
286
- json_data = json.dumps(st.session_state.feedback_data, indent=2)
287
-
288
- # Create a BytesIO object
289
- buffer = BytesIO()
290
- buffer.write(json_data.encode())
291
- buffer.seek(0)
292
-
293
- return buffer
294
-
295
- # Function to clean text
296
- def clean_text(text):
297
- text = re.sub(r"[^\x00-\x7F]", " ", text)
298
- text = re.sub(f"[\n]"," ", text)
299
- return text
300
-
301
- # Function to create text chunks
302
- def segment_text(text, max_segment_length=700, batch_size=7):
303
- sentences = sent_tokenize(text)
304
- segments = []
305
- current_segment = ""
306
-
307
- for sentence in sentences:
308
- if len(current_segment) + len(sentence) <= max_segment_length:
309
- current_segment += sentence + " "
310
- else:
311
- segments.append(current_segment.strip())
312
- current_segment = sentence + " "
313
-
314
- if current_segment:
315
- segments.append(current_segment.strip())
316
-
317
- # Create batches
318
- batches = [segments[i:i + batch_size] for i in range(0, len(segments), batch_size)]
319
- return batches
320
-
321
-
322
- # Function to extract keywords using combined techniques
323
- def extract_keywords(text, extract_all):
324
- try:
325
- gliner_model = GLiNER.from_pretrained("knowledgator/gliner-multitask-large-v0.5")
326
- labels = ["person", "organization", "email", "Award", "Date", "Competitions", "Teams", "location", "percentage", "money"]
327
- entities = gliner_model.predict_entities(text, labels, threshold=0.7)
328
-
329
- gliner_keywords = list(set([ent["text"] for ent in entities]))
330
- print(f"Gliner keywords:{gliner_keywords}")
331
- # Use Only Gliner Entities
332
- if extract_all is False:
333
- return list(gliner_keywords)
334
-
335
- doc = nlp(text)
336
- spacy_keywords = set([ent.text for ent in doc.ents])
337
- spacy_entities = spacy_keywords
338
- print(f"\n\nSpacy Entities: {spacy_entities} \n\n")
339
-
340
- #
341
- # if extract_all is False:
342
- # return list(spacy_entities)
343
-
344
- # Use RAKE
345
- rake = Rake()
346
- rake.extract_keywords_from_text(text)
347
- rake_keywords = set(rake.get_ranked_phrases())
348
- print(f"\n\nRake Keywords: {rake_keywords} \n\n")
349
- # Use spaCy for NER and POS tagging
350
- spacy_keywords.update([token.text for token in doc if token.pos_ in ["NOUN", "PROPN", "VERB", "ADJ"]])
351
- print(f"\n\nSpacy Keywords: {spacy_keywords} \n\n")
352
- # Use TF-IDF
353
- vectorizer = TfidfVectorizer(stop_words='english')
354
- X = vectorizer.fit_transform([text])
355
- tfidf_keywords = set(vectorizer.get_feature_names_out())
356
- print(f"\n\nTFIDF Entities: {tfidf_keywords} \n\n")
357
-
358
- # Combine all keywords
359
- combined_keywords = rake_keywords.union(spacy_keywords).union(tfidf_keywords).union(gliner_keywords)
360
-
361
- return list(combined_keywords)
362
- except Exception as e:
363
- raise QuestionGenerationError(f"Error in keyword extraction: {str(e)}")
364
-
365
- def get_similar_words_sense2vec(word, n=3):
366
- # Try to find the word with its most likely part-of-speech
367
- word_with_pos = word + "|NOUN"
368
- if word_with_pos in s2v:
369
- similar_words = s2v.most_similar(word_with_pos, n=n)
370
- return [word.split("|")[0] for word, _ in similar_words]
371
-
372
- # If not found, try without POS
373
- if word in s2v:
374
- similar_words = s2v.most_similar(word, n=n)
375
- return [word.split("|")[0] for word, _ in similar_words]
376
-
377
- return []
378
-
379
- def get_synonyms(word, n=3):
380
- synonyms = []
381
- for syn in wordnet.synsets(word):
382
- for lemma in syn.lemmas():
383
- if lemma.name() != word and lemma.name() not in synonyms:
384
- synonyms.append(lemma.name())
385
- if len(synonyms) == n:
386
- return synonyms
387
- return synonyms
388
-
389
- def generate_options(answer, context, n=3):
390
- options = [answer]
391
-
392
- # Add contextually relevant words using a pre-trained model
393
- context_embedding = context_model.encode(context)
394
- answer_embedding = context_model.encode(answer)
395
- context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
396
-
397
- # Compute similarity scores and sort context words
398
- similarity_scores = [util.pytorch_cos_sim(context_model.encode(word), answer_embedding).item() for word in context_words]
399
- sorted_context_words = [word for _, word in sorted(zip(similarity_scores, context_words), reverse=True)]
400
- options.extend(sorted_context_words[:n])
401
-
402
- # Try to get similar words based on sense2vec
403
- similar_words = get_similar_words_sense2vec(answer, n)
404
- options.extend(similar_words)
405
-
406
- # If we don't have enough options, try synonyms
407
- if len(options) < n + 1:
408
- synonyms = get_synonyms(answer, n - len(options) + 1)
409
- options.extend(synonyms)
410
-
411
- # If we still don't have enough options, extract other entities from the context
412
- if len(options) < n + 1:
413
- doc = nlp(context)
414
- entities = [ent.text for ent in doc.ents if ent.text.lower() != answer.lower()]
415
- options.extend(entities[:n - len(options) + 1])
416
-
417
- # If we still need more options, add some random words from the context
418
- if len(options) < n + 1:
419
- context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
420
- options.extend(random.sample(context_words, min(n - len(options) + 1, len(context_words))))
421
- print(f"\n\nAll Possible Options: {options}\n\n")
422
- # Ensure we have the correct number of unique options
423
- options = list(dict.fromkeys(options))[:n+1]
424
-
425
- # Shuffle the options
426
- random.shuffle(options)
427
-
428
- return options
429
-
430
- # Function to map keywords to sentences with customizable context window size
431
- def map_keywords_to_sentences(text, keywords, context_window_size):
432
- sentences = sent_tokenize(text)
433
- keyword_sentence_mapping = {}
434
- print(f"\n\nSentences: {sentences}\n\n")
435
- for keyword in keywords:
436
- for i, sentence in enumerate(sentences):
437
- if keyword in sentence:
438
- # Combine current sentence with surrounding sentences for context
439
- # start = max(0, i - context_window_size)
440
- # end = min(len(sentences), i + context_window_size + 1)
441
- start = max(0,i - context_window_size)
442
- context_sentenses = sentences[start:i+1]
443
- context = ' '.join(context_sentenses)
444
- # context = ' '.join(sentences[start:end])
445
- if keyword not in keyword_sentence_mapping:
446
- keyword_sentence_mapping[keyword] = context
447
- else:
448
- keyword_sentence_mapping[keyword] += ' ' + context
449
- return keyword_sentence_mapping
450
-
451
-
452
- # Function to perform entity linking using Wikipedia API
453
- @lru_cache(maxsize=128)
454
- def entity_linking(keyword):
455
- page = wiki_wiki.page(keyword)
456
- if page.exists():
457
- return page.fullurl
458
- return None
459
-
460
- async def generate_question_async(context, answer, num_beams):
461
- try:
462
- input_text = f"<context> {context} <answer> {answer}"
463
- print(f"\n{input_text}\n")
464
- input_ids = tokenizer.encode(input_text, return_tensors='pt')
465
- outputs = await asyncio.to_thread(model.generate, input_ids, num_beams=num_beams, early_stopping=True, max_length=250)
466
- question = tokenizer.decode(outputs[0], skip_special_tokens=True)
467
- print(f"\n{question}\n")
468
- return question
469
- except Exception as e:
470
- raise QuestionGenerationError(f"Error in question generation: {str(e)}")
471
-
472
- async def generate_options_async(answer, context, n=3):
473
- try:
474
- options = [answer]
475
-
476
- # Add contextually relevant words using a pre-trained model
477
- context_embedding = await asyncio.to_thread(context_model.encode, context)
478
- answer_embedding = await asyncio.to_thread(context_model.encode, answer)
479
- context_words = [token.text for token in nlp(context) if token.is_alpha and token.text.lower() != answer.lower()]
480
-
481
- # Compute similarity scores and sort context words
482
- similarity_scores = [util.pytorch_cos_sim(await asyncio.to_thread(context_model.encode, word), answer_embedding).item() for word in context_words]
483
- sorted_context_words = [word for _, word in sorted(zip(similarity_scores, context_words), reverse=True)]
484
- options.extend(sorted_context_words[:n])
485
-
486
- # Try to get similar words based on sense2vec
487
- similar_words = await asyncio.to_thread(get_similar_words_sense2vec, answer, n)
488
- options.extend(similar_words)
489
-
490
- # If we don't have enough options, try synonyms
491
- if len(options) < n + 1:
492
- synonyms = await asyncio.to_thread(get_synonyms, answer, n - len(options) + 1)
493
- options.extend(synonyms)
494
-
495
- # Ensure we have the correct number of unique options
496
- options = list(dict.fromkeys(options))[:n+1]
497
-
498
- # Shuffle the options
499
- random.shuffle(options)
500
-
501
- return options
502
- except Exception as e:
503
- raise QuestionGenerationError(f"Error in generating options: {str(e)}")
504
-
505
-
506
- # Function to generate questions using beam search
507
- async def generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords):
508
- try:
509
- batches = segment_text(text)
510
- keywords = extract_keywords(text, extract_all_keywords)
511
- all_questions = []
512
-
513
- progress_bar = st.progress(0)
514
- status_text = st.empty()
515
-
516
- for i, batch in enumerate(batches):
517
- status_text.text(f"Processing batch {i+1} of {len(batches)}...")
518
- batch_questions = await process_batch(batch, keywords, context_window_size, num_beams)
519
- all_questions.extend(batch_questions)
520
- progress_bar.progress((i + 1) / len(batches))
521
-
522
- if len(all_questions) >= num_questions:
523
- break
524
-
525
- progress_bar.empty()
526
- status_text.empty()
527
-
528
- return all_questions[:num_questions]
529
- except QuestionGenerationError as e:
530
- st.error(f"An error occurred during question generation: {str(e)}")
531
- return []
532
- except Exception as e:
533
- st.error(f"An unexpected error occurred: {str(e)}")
534
- return []
535
-
536
- async def generate_fill_in_the_blank_questions(context,answer):
537
- answerSize = len(answer)
538
- replacedBlanks = ""
539
- for i in range(answerSize):
540
- replacedBlanks += "_"
541
- blank_q = context.replace(answer,replacedBlanks)
542
- return blank_q
543
-
544
- async def process_batch(batch, keywords, context_window_size, num_beams):
545
- questions = []
546
- for text in batch:
547
- keyword_sentence_mapping = map_keywords_to_sentences(text, keywords, context_window_size)
548
- for keyword, context in keyword_sentence_mapping.items():
549
- question = await generate_question_async(context, keyword, num_beams)
550
- options = await generate_options_async(keyword, context)
551
- blank_question = await generate_fill_in_the_blank_questions(context,keyword)
552
- overall_score, relevance_score, complexity_score, spelling_correctness = assess_question_quality(context, question, keyword)
553
- if overall_score >= 0.5:
554
- questions.append({
555
- "question": question,
556
- "context": context,
557
- "answer": keyword,
558
- "options": options,
559
- "overall_score": overall_score,
560
- "relevance_score": relevance_score,
561
- "complexity_score": complexity_score,
562
- "spelling_correctness": spelling_correctness,
563
- "blank_question": blank_question,
564
- })
565
- return questions
566
-
567
- # Function to export questions to CSV
568
- def export_to_csv(data):
569
- # df = pd.DataFrame(data, columns=["Context", "Answer", "Question", "Options"])
570
- df = pd.DataFrame(data)
571
- # csv = df.to_csv(index=False,encoding='utf-8')
572
- csv = df.to_csv(index=False)
573
- return csv
574
-
575
- # Function to export questions to PDF
576
- def export_to_pdf(data):
577
- pdf = FPDF()
578
- pdf.add_page()
579
- pdf.set_font("Arial", size=12)
580
-
581
- for item in data:
582
- pdf.multi_cell(0, 10, f"Context: {item['context']}")
583
- pdf.multi_cell(0, 10, f"Question: {item['question']}")
584
- pdf.multi_cell(0, 10, f"Answer: {item['answer']}")
585
- pdf.multi_cell(0, 10, f"Options: {', '.join(item['options'])}")
586
- pdf.multi_cell(0, 10, f"Overall Score: {item['overall_score']:.2f}")
587
- pdf.ln(10)
588
-
589
- return pdf.output(dest='S').encode('latin-1')
590
-
591
- def display_word_cloud(generated_questions):
592
- word_frequency = {}
593
- for question in generated_questions:
594
- words = question.split()
595
- for word in words:
596
- word_frequency[word] = word_frequency.get(word, 0) + 1
597
-
598
- wordcloud = WordCloud(width=800, height=400, background_color='white').generate_from_frequencies(word_frequency)
599
- plt.figure(figsize=(10, 5))
600
- plt.imshow(wordcloud, interpolation='bilinear')
601
- plt.axis('off')
602
- st.pyplot()
603
-
604
-
605
- def assess_question_quality(context, question, answer):
606
- # Assess relevance using cosine similarity
607
- context_doc = nlp(context)
608
- question_doc = nlp(question)
609
- relevance_score = context_doc.similarity(question_doc)
610
-
611
- # Assess complexity using token length (as a simple metric)
612
- complexity_score = min(len(question_doc) / 20, 1) # Normalize to 0-1
613
-
614
- # Assess Spelling correctness
615
- misspelled = spell.unknown(question.split())
616
- spelling_correctness = 1 - (len(misspelled) / len(question.split())) # Normalize to 0-1
617
-
618
- # Calculate overall score (you can adjust weights as needed)
619
- overall_score = (
620
- 0.4 * relevance_score +
621
- 0.4 * complexity_score +
622
- 0.2 * spelling_correctness
623
- )
624
-
625
- return overall_score, relevance_score, complexity_score, spelling_correctness
626
 
627
  def main():
628
- # Streamlit interface
629
  st.title(":blue[Question Generator System]")
630
  session_id = get_session_id()
631
  state = initialize_state(session_id)
@@ -633,18 +43,18 @@ def main():
633
  st.session_state.feedback_data = []
634
 
635
  with st.sidebar:
636
- show_info = st.toggle('Show Info',True)
637
  if show_info:
638
  display_info()
639
  st.subheader("Customization Options")
640
  # Customization options
641
  input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
642
  with st.expander("Choose the Additional Elements to show"):
643
- show_context = st.checkbox("Context",True)
644
  show_answer = st.checkbox("Answer",True)
645
- show_options = st.checkbox("Options",False)
646
  show_entity_link = st.checkbox("Entity Link For Wikipedia",True)
647
- show_qa_scores = st.checkbox("QA Score",False)
648
  show_blank_question = st.checkbox("Fill in the Blank Questions",True)
649
  num_beams = st.slider("Select number of beams for question generation", min_value=2, max_value=10, value=2)
650
  context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
@@ -670,15 +80,15 @@ def main():
670
  text = clean_text(text)
671
  with st.expander("Show text"):
672
  st.write(text)
 
673
  generate_questions_button = st.button("Generate Questions",help="This is the generate questions button")
674
  # st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
675
 
676
- # if generate_questions_button:
677
  if generate_questions_button and text:
678
  start_time = time.time()
679
  with st.spinner("Generating questions..."):
680
  try:
681
- state['generated_questions'] = asyncio.run(generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords))
682
  if not state['generated_questions']:
683
  st.warning("No questions were generated. The text might be too short or lack suitable content.")
684
  else:
@@ -739,12 +149,16 @@ def main():
739
  # Export buttons
740
  # if st.session_state.generated_questions:
741
  if state['generated_questions']:
742
- with st.sidebar:
743
- csv_data = export_to_csv(state['generated_questions'])
744
- st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
745
-
746
- pdf_data = export_to_pdf(state['generated_questions'])
747
- st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
 
 
 
 
748
 
749
  with st.expander("View Visualizations"):
750
  questions = [tpl['question'] for tpl in state['generated_questions']]
@@ -755,7 +169,6 @@ def main():
755
  overall_scores = pd.DataFrame(overall_scores,columns=['Overall Scores'])
756
  st.line_chart(overall_scores)
757
 
758
-
759
  # View Feedback Statistics
760
  with st.expander("View Feedback Statistics"):
761
  analyze_feedback()
 
 
 
 
1
  import nltk
 
 
 
 
 
 
2
  nltk.download('punkt')
3
  nltk.download('stopwords')
4
  nltk.download('brown')
 
5
  nltk.download('wordnet')
6
+ import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  st.set_page_config(
9
  page_icon='cyclone',
 
14
  }
15
  )
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ from text_processing import clean_text, get_pdf_text
19
+ from question_generation import generate_questions_async
20
+ from visualization import display_word_cloud
21
+ from data_export import export_to_csv, export_to_pdf
22
+ from feedback import collect_feedback, analyze_feedback, export_feedback_data
23
+ from utils import get_session_id, initialize_state, get_state, set_state, display_info, QuestionGenerationError, entity_linking
24
+ import asyncio
25
+ import time
26
+ import pandas as pd
27
+ from data_export import send_email_with_attachment
28
 
29
+ st.set_option('deprecation.showPyplotGlobalUse',False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
  with st.sidebar:
32
  select_model = st.selectbox("Select Model", ("T5-large","T5-small"))
 
34
  modelname = "DevBM/t5-large-squad"
35
  elif select_model == "T5-small":
36
  modelname = "AneriThakkar/flan-t5-small-finetuned"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def main():
 
39
  st.title(":blue[Question Generator System]")
40
  session_id = get_session_id()
41
  state = initialize_state(session_id)
 
43
  st.session_state.feedback_data = []
44
 
45
  with st.sidebar:
46
+ show_info = st.toggle('Show Info',False)
47
  if show_info:
48
  display_info()
49
  st.subheader("Customization Options")
50
  # Customization options
51
  input_type = st.radio("Select Input Preference", ("Text Input","Upload PDF"))
52
  with st.expander("Choose the Additional Elements to show"):
53
+ show_context = st.checkbox("Context",False)
54
  show_answer = st.checkbox("Answer",True)
55
+ show_options = st.checkbox("Options",True)
56
  show_entity_link = st.checkbox("Entity Link For Wikipedia",True)
57
+ show_qa_scores = st.checkbox("QA Score",True)
58
  show_blank_question = st.checkbox("Fill in the Blank Questions",True)
59
  num_beams = st.slider("Select number of beams for question generation", min_value=2, max_value=10, value=2)
60
  context_window_size = st.slider("Select context window size (number of sentences before and after)", min_value=1, max_value=5, value=1)
 
80
  text = clean_text(text)
81
  with st.expander("Show text"):
82
  st.write(text)
83
+ # st.text(text)
84
  generate_questions_button = st.button("Generate Questions",help="This is the generate questions button")
85
  # st.markdown('<span aria-label="Generate questions button">Above is the generate questions button</span>', unsafe_allow_html=True)
86
 
 
87
  if generate_questions_button and text:
88
  start_time = time.time()
89
  with st.spinner("Generating questions..."):
90
  try:
91
+ state['generated_questions'] = asyncio.run(generate_questions_async(text, num_questions, context_window_size, num_beams, extract_all_keywords,modelname))
92
  if not state['generated_questions']:
93
  st.warning("No questions were generated. The text might be too short or lack suitable content.")
94
  else:
 
149
  # Export buttons
150
  # if st.session_state.generated_questions:
151
  if state['generated_questions']:
152
+ with st.sidebar:
153
+ # Adding error handling while exporting the files
154
+ # ---------------------------------------------------------------------
155
+ try:
156
+ csv_data = export_to_csv(state['generated_questions'])
157
+ st.download_button(label="Download CSV", data=csv_data, file_name='questions.csv', mime='text/csv')
158
+ pdf_data = export_to_pdf(state['generated_questions'])
159
+ st.download_button(label="Download PDF", data=pdf_data, file_name='questions.pdf', mime='application/pdf')
160
+ except Exception as e:
161
+ st.error(f"Error exporting CSV: {e}")
162
 
163
  with st.expander("View Visualizations"):
164
  questions = [tpl['question'] for tpl in state['generated_questions']]
 
169
  overall_scores = pd.DataFrame(overall_scores,columns=['Overall Scores'])
170
  st.line_chart(overall_scores)
171
 
 
172
  # View Feedback Statistics
173
  with st.expander("View Feedback Statistics"):
174
  analyze_feedback()