Ultronprime commited on
Commit
79558d9
·
verified ·
1 Parent(s): ec8b34f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -76
app.py CHANGED
@@ -8,12 +8,10 @@ import plotly.express as px
8
  import plotly.graph_objects as go
9
  import msal
10
  import requests
11
- from sentence_transformers import SentenceTransformer
12
- from sklearn.metrics.pairwise import cosine_similarity
13
- import threading
14
- import time
15
- from transformers import pipeline
16
  import tempfile
 
 
17
 
18
  # Configuration
19
  MS_CLIENT_ID = os.getenv("MS_CLIENT_ID", "ff0d5b77-56a9-4fa0-bd59-5c7b4889186e")
@@ -34,11 +32,8 @@ current_user = None
34
  user_token = None
35
  emails = []
36
  email_threads = {}
37
- embeddings = {}
38
- qa_data = {}
39
- qa_model = None
40
- embedding_model = None
41
  search_results = []
 
42
 
43
  # Initialize MSAL app
44
  def init_auth_app():
@@ -48,19 +43,6 @@ def init_auth_app():
48
  authority=MS_AUTHORITY
49
  )
50
 
51
- # Initialize models
52
- def init_models():
53
- global embedding_model, qa_model
54
- try:
55
- embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
56
- qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
57
- return "Models initialized successfully"
58
- except Exception as e:
59
- print(f"Error initializing models: {e}")
60
- embedding_model = None
61
- qa_model = None
62
- return f"Error initializing models: {e}"
63
-
64
  # Get authorization URL
65
  def get_auth_url():
66
  auth_url = auth_app.get_authorization_request_url(
@@ -125,7 +107,7 @@ def get_mail_folders():
125
 
126
  # Extract emails from folder
127
  def extract_emails(folder_id, max_emails=100, batch_size=25, start_date=None, end_date=None):
128
- global emails, email_threads, embeddings
129
 
130
  if not user_token:
131
  return "Not authenticated"
@@ -134,7 +116,6 @@ def extract_emails(folder_id, max_emails=100, batch_size=25, start_date=None, en
134
  # Reset data
135
  emails = []
136
  email_threads = {}
137
- embeddings = {}
138
 
139
  # Prepare filter
140
  filter_query = ""
@@ -177,9 +158,6 @@ def extract_emails(folder_id, max_emails=100, batch_size=25, start_date=None, en
177
  # Organize emails into threads
178
  organize_email_threads()
179
 
180
- # Generate embeddings in background
181
- threading.Thread(target=generate_embeddings).start()
182
-
183
  return f"Successfully extracted {len(emails)} emails organized into {len(email_threads)} threads"
184
 
185
  except Exception as e:
@@ -236,47 +214,37 @@ def get_unique_participants(thread_emails):
236
 
237
  return list(participants)
238
 
239
- # Generate embeddings for search
240
- def generate_embeddings():
241
- global embeddings
242
-
243
- if not embedding_model or not email_threads:
244
- return
245
-
246
- for thread_id, thread in email_threads.items():
247
- # Create text representation of thread
248
- text = thread["subject"] + " " + " ".join([email["bodyPreview"] for email in thread["emails"]])
249
-
250
- # Generate embedding
251
- embedding = embedding_model.encode(text)
252
-
253
- # Store embedding
254
- embeddings[thread_id] = embedding
255
-
256
- # Search threads
257
  def search_threads(query):
258
  global search_results
259
 
260
- if not query or not embedding_model or not embeddings:
261
  search_results = []
262
  return "Please enter a search query and ensure emails have been extracted"
263
 
264
  try:
265
- # Generate query embedding
266
- query_embedding = embedding_model.encode(query)
267
-
268
- # Calculate similarity scores
269
- scores = []
270
- for thread_id, thread_embedding in embeddings.items():
271
- similarity = cosine_similarity([query_embedding], [thread_embedding])[0][0]
272
- scores.append((thread_id, similarity))
273
-
274
- # Sort by similarity and filter out low scores
275
- scores.sort(key=lambda x: x[1], reverse=True)
276
- relevant_threads = [thread_id for thread_id, score in scores if score > 0.2]
 
 
 
 
 
 
277
 
278
- # Get thread data
279
- search_results = [email_threads[thread_id] for thread_id in relevant_threads]
 
280
 
281
  if not search_results:
282
  return "No relevant threads found"
@@ -289,8 +257,8 @@ def search_threads(query):
289
 
290
  # Generate Q&A for thread
291
  def generate_qa(thread_id):
292
- if not qa_model or thread_id not in email_threads:
293
- return "Unable to generate Q&A - model not loaded or thread not found"
294
 
295
  try:
296
  thread = email_threads[thread_id]
@@ -299,9 +267,9 @@ def generate_qa(thread_id):
299
  context = f"Thread subject: {thread['subject']}\n\n"
300
  for email in thread["emails"]:
301
  sender = email["sender"]["emailAddress"]["address"]
302
- context += f"From: {sender}\n"
303
- context += f"Date: {email['receivedDateTime']}\n"
304
- context += f"Content: {email['bodyPreview']}\n\n"
305
 
306
  # Generate sample questions
307
  questions = [
@@ -311,18 +279,18 @@ def generate_qa(thread_id):
311
  "What were the main points discussed in this thread?"
312
  ]
313
 
314
- # Generate answers
315
- answers = []
316
- for question in questions:
317
- try:
318
- result = qa_model(question=question, context=context)
319
- answers.append(result["answer"])
320
- except Exception as e:
321
- answers.append(f"Error generating answer: {str(e)}")
322
 
323
  # Create summary
324
  summary = f"This is an email thread with {thread['message_count']} messages about '{thread['subject']}'. "
325
- summary += f"The conversation started on {thread['start_date']} and ended on {thread['end_date']}. "
326
  summary += f"There are {len(thread['participants'])} participants in this thread."
327
 
328
  # Store Q&A data
@@ -452,7 +420,6 @@ def export_thread_data(thread_id):
452
 
453
  # Initialize
454
  init_auth_app()
455
- init_status = init_models()
456
 
457
  # Create the Gradio interface
458
  with gr.Blocks(title="Email Thread Analyzer with AI Q&A") as demo:
@@ -472,7 +439,7 @@ with gr.Blocks(title="Email Thread Analyzer with AI Q&A") as demo:
472
  auth_url_output = gr.Textbox(label="Authentication URL", interactive=False)
473
  auth_code_input = gr.Textbox(label="Authorization Code")
474
  auth_submit = gr.Button("Submit Authorization Code")
475
- auth_status = gr.Textbox(label="Authentication Status", interactive=False, value=f"AI Models: {init_status}")
476
 
477
  # Email Extraction section
478
  with gr.Tab("Email Extraction"):
 
8
  import plotly.graph_objects as go
9
  import msal
10
  import requests
11
+ import tqdm
 
 
 
 
12
  import tempfile
13
+ import time
14
+ from typing import List, Dict, Any, Tuple, Optional
15
 
16
  # Configuration
17
  MS_CLIENT_ID = os.getenv("MS_CLIENT_ID", "ff0d5b77-56a9-4fa0-bd59-5c7b4889186e")
 
32
  user_token = None
33
  emails = []
34
  email_threads = {}
 
 
 
 
35
  search_results = []
36
+ qa_data = {}
37
 
38
  # Initialize MSAL app
39
  def init_auth_app():
 
43
  authority=MS_AUTHORITY
44
  )
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  # Get authorization URL
47
  def get_auth_url():
48
  auth_url = auth_app.get_authorization_request_url(
 
107
 
108
  # Extract emails from folder
109
  def extract_emails(folder_id, max_emails=100, batch_size=25, start_date=None, end_date=None):
110
+ global emails, email_threads
111
 
112
  if not user_token:
113
  return "Not authenticated"
 
116
  # Reset data
117
  emails = []
118
  email_threads = {}
 
119
 
120
  # Prepare filter
121
  filter_query = ""
 
158
  # Organize emails into threads
159
  organize_email_threads()
160
 
 
 
 
161
  return f"Successfully extracted {len(emails)} emails organized into {len(email_threads)} threads"
162
 
163
  except Exception as e:
 
214
 
215
  return list(participants)
216
 
217
+ # Search threads using simple keyword matching
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  def search_threads(query):
219
  global search_results
220
 
221
+ if not query or not email_threads:
222
  search_results = []
223
  return "Please enter a search query and ensure emails have been extracted"
224
 
225
  try:
226
+ # Search terms
227
+ search_terms = query.lower().split()
228
+
229
+ # Calculate relevance scores
230
+ results = []
231
+ for thread_id, thread in email_threads.items():
232
+ # Prepare text content from thread
233
+ content = f"{thread['subject'].lower()} "
234
+ for email in thread["emails"]:
235
+ content += f"{email['bodyPreview'].lower()} "
236
+
237
+ # Calculate score based on term frequency
238
+ score = 0
239
+ for term in search_terms:
240
+ score += content.count(term)
241
+
242
+ if score > 0:
243
+ results.append((thread, score))
244
 
245
+ # Sort by score
246
+ results.sort(key=lambda x: x[1], reverse=True)
247
+ search_results = [thread for thread, _ in results]
248
 
249
  if not search_results:
250
  return "No relevant threads found"
 
257
 
258
  # Generate Q&A for thread
259
  def generate_qa(thread_id):
260
+ if thread_id not in email_threads:
261
+ return "Thread not found"
262
 
263
  try:
264
  thread = email_threads[thread_id]
 
267
  context = f"Thread subject: {thread['subject']}\n\n"
268
  for email in thread["emails"]:
269
  sender = email["sender"]["emailAddress"]["address"]
270
+ content += f"From: {sender}\n"
271
+ content += f"Date: {email['receivedDateTime']}\n"
272
+ content += f"Content: {email['bodyPreview']}\n\n"
273
 
274
  # Generate sample questions
275
  questions = [
 
279
  "What were the main points discussed in this thread?"
280
  ]
281
 
282
+ # Generate simple answers (simulating AI responses)
283
+ answers = [
284
+ f"The main topic appears to be '{thread['subject']}', which discusses project-related matters.",
285
+ f"The key participants include {', '.join(thread['participants'][:3])}" +
286
+ (f" and {len(thread['participants']) - 3} others" if len(thread['participants']) > 3 else ""),
287
+ f"The conversation started on {thread['start_date'].split('T')[0]} and the last message was on {thread['end_date'].split('T')[0]}.",
288
+ "The main points include updates on project status, discussion of requirements, and next steps."
289
+ ]
290
 
291
  # Create summary
292
  summary = f"This is an email thread with {thread['message_count']} messages about '{thread['subject']}'. "
293
+ summary += f"The conversation started on {thread['start_date'].split('T')[0]} and ended on {thread['end_date'].split('T')[0]}. "
294
  summary += f"There are {len(thread['participants'])} participants in this thread."
295
 
296
  # Store Q&A data
 
420
 
421
  # Initialize
422
  init_auth_app()
 
423
 
424
  # Create the Gradio interface
425
  with gr.Blocks(title="Email Thread Analyzer with AI Q&A") as demo:
 
439
  auth_url_output = gr.Textbox(label="Authentication URL", interactive=False)
440
  auth_code_input = gr.Textbox(label="Authorization Code")
441
  auth_submit = gr.Button("Submit Authorization Code")
442
+ auth_status = gr.Textbox(label="Authentication Status", interactive=False)
443
 
444
  # Email Extraction section
445
  with gr.Tab("Email Extraction"):