Spaces:

Ultronprime
/

cloud-rag-webhook

Runtime error

App Files Files Community

Ultronprime commited on Mar 9, 2025

Commit

79558d9

verified ·

1 Parent(s): ec8b34f

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -76

app.py CHANGED Viewed

@@ -8,12 +8,10 @@ import plotly.express as px
 import plotly.graph_objects as go
 import msal
 import requests
-from sentence_transformers import SentenceTransformer
-from sklearn.metrics.pairwise import cosine_similarity
-import threading
-import time
-from transformers import pipeline
 import tempfile
 # Configuration
 MS_CLIENT_ID = os.getenv("MS_CLIENT_ID", "ff0d5b77-56a9-4fa0-bd59-5c7b4889186e")
@@ -34,11 +32,8 @@ current_user = None
 user_token = None
 emails = []
 email_threads = {}
-embeddings = {}
-qa_data = {}
-qa_model = None
-embedding_model = None
 search_results = []
 # Initialize MSAL app
 def init_auth_app():
@@ -48,19 +43,6 @@ def init_auth_app():
         authority=MS_AUTHORITY
     )
-# Initialize models
-def init_models():
-    global embedding_model, qa_model
-    try:
-        embedding_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
-        qa_model = pipeline("question-answering", model="deepset/roberta-base-squad2")
-        return "Models initialized successfully"
-    except Exception as e:
-        print(f"Error initializing models: {e}")
-        embedding_model = None
-        qa_model = None
-        return f"Error initializing models: {e}"
 # Get authorization URL
 def get_auth_url():
     auth_url = auth_app.get_authorization_request_url(
@@ -125,7 +107,7 @@ def get_mail_folders():
 # Extract emails from folder
 def extract_emails(folder_id, max_emails=100, batch_size=25, start_date=None, end_date=None):
-    global emails, email_threads, embeddings
     if not user_token:
         return "Not authenticated"
@@ -134,7 +116,6 @@ def extract_emails(folder_id, max_emails=100, batch_size=25, start_date=None, en
         # Reset data
         emails = []
         email_threads = {}
-        embeddings = {}
         # Prepare filter
         filter_query = ""
@@ -177,9 +158,6 @@ def extract_emails(folder_id, max_emails=100, batch_size=25, start_date=None, en
         # Organize emails into threads
         organize_email_threads()
-        # Generate embeddings in background
-        threading.Thread(target=generate_embeddings).start()
         return f"Successfully extracted {len(emails)} emails organized into {len(email_threads)} threads"
     except Exception as e:
@@ -236,47 +214,37 @@ def get_unique_participants(thread_emails):
     return list(participants)
-# Generate embeddings for search
-def generate_embeddings():
-    global embeddings
-    if not embedding_model or not email_threads:
-        return
-    for thread_id, thread in email_threads.items():
-        # Create text representation of thread
-        text = thread["subject"] + " " + " ".join([email["bodyPreview"] for email in thread["emails"]])
-        # Generate embedding
-        embedding = embedding_model.encode(text)
-        # Store embedding
-        embeddings[thread_id] = embedding
-# Search threads
 def search_threads(query):
     global search_results
-    if not query or not embedding_model or not embeddings:
         search_results = []
         return "Please enter a search query and ensure emails have been extracted"
     try:
-        # Generate query embedding
-        query_embedding = embedding_model.encode(query)
-        # Calculate similarity scores
-        scores = []
-        for thread_id, thread_embedding in embeddings.items():
-            similarity = cosine_similarity([query_embedding], [thread_embedding])[0][0]
-            scores.append((thread_id, similarity))
-        # Sort by similarity and filter out low scores
-        scores.sort(key=lambda x: x[1], reverse=True)
-        relevant_threads = [thread_id for thread_id, score in scores if score > 0.2]
-        # Get thread data
-        search_results = [email_threads[thread_id] for thread_id in relevant_threads]
         if not search_results:
             return "No relevant threads found"
@@ -289,8 +257,8 @@ def search_threads(query):
 # Generate Q&A for thread
 def generate_qa(thread_id):
-    if not qa_model or thread_id not in email_threads:
-        return "Unable to generate Q&A - model not loaded or thread not found"
     try:
         thread = email_threads[thread_id]
@@ -299,9 +267,9 @@ def generate_qa(thread_id):
         context = f"Thread subject: {thread['subject']}\n\n"
         for email in thread["emails"]:
             sender = email["sender"]["emailAddress"]["address"]
-            context += f"From: {sender}\n"
-            context += f"Date: {email['receivedDateTime']}\n"
-            context += f"Content: {email['bodyPreview']}\n\n"
         # Generate sample questions
         questions = [
@@ -311,18 +279,18 @@ def generate_qa(thread_id):
             "What were the main points discussed in this thread?"
         ]
-        # Generate answers
-        answers = []
-        for question in questions:
-            try:
-                result = qa_model(question=question, context=context)
-                answers.append(result["answer"])
-            except Exception as e:
-                answers.append(f"Error generating answer: {str(e)}")
         # Create summary
         summary = f"This is an email thread with {thread['message_count']} messages about '{thread['subject']}'. "
-        summary += f"The conversation started on {thread['start_date']} and ended on {thread['end_date']}. "
         summary += f"There are {len(thread['participants'])} participants in this thread."
         # Store Q&A data
@@ -452,7 +420,6 @@ def export_thread_data(thread_id):
 # Initialize
 init_auth_app()
-init_status = init_models()
 # Create the Gradio interface
 with gr.Blocks(title="Email Thread Analyzer with AI Q&A") as demo:
@@ -472,7 +439,7 @@ with gr.Blocks(title="Email Thread Analyzer with AI Q&A") as demo:
                 auth_url_output = gr.Textbox(label="Authentication URL", interactive=False)
                 auth_code_input = gr.Textbox(label="Authorization Code")
                 auth_submit = gr.Button("Submit Authorization Code")
-                auth_status = gr.Textbox(label="Authentication Status", interactive=False, value=f"AI Models: {init_status}")
     # Email Extraction section
     with gr.Tab("Email Extraction"):

 import plotly.graph_objects as go
 import msal
 import requests
+import tqdm
 import tempfile
+import time
+from typing import List, Dict, Any, Tuple, Optional
 # Configuration
 MS_CLIENT_ID = os.getenv("MS_CLIENT_ID", "ff0d5b77-56a9-4fa0-bd59-5c7b4889186e")
 user_token = None
 emails = []
 email_threads = {}
 search_results = []
+qa_data = {}
 # Initialize MSAL app
 def init_auth_app():
         authority=MS_AUTHORITY
     )
 # Get authorization URL
 def get_auth_url():
     auth_url = auth_app.get_authorization_request_url(
 # Extract emails from folder
 def extract_emails(folder_id, max_emails=100, batch_size=25, start_date=None, end_date=None):
+    global emails, email_threads
     if not user_token:
         return "Not authenticated"
         # Reset data
         emails = []
         email_threads = {}
         # Prepare filter
         filter_query = ""
         # Organize emails into threads
         organize_email_threads()
         return f"Successfully extracted {len(emails)} emails organized into {len(email_threads)} threads"
     except Exception as e:
     return list(participants)
+# Search threads using simple keyword matching
 def search_threads(query):
     global search_results
+    if not query or not email_threads:
         search_results = []
         return "Please enter a search query and ensure emails have been extracted"
     try:
+        # Search terms
+        search_terms = query.lower().split()
+        # Calculate relevance scores
+        results = []
+        for thread_id, thread in email_threads.items():
+            # Prepare text content from thread
+            content = f"{thread['subject'].lower()} "
+            for email in thread["emails"]:
+                content += f"{email['bodyPreview'].lower()} "
+            # Calculate score based on term frequency
+            score = 0
+            for term in search_terms:
+                score += content.count(term)
+            if score > 0:
+                results.append((thread, score))
+        # Sort by score
+        results.sort(key=lambda x: x[1], reverse=True)
+        search_results = [thread for thread, _ in results]
         if not search_results:
             return "No relevant threads found"
 # Generate Q&A for thread
 def generate_qa(thread_id):
+    if thread_id not in email_threads:
+        return "Thread not found"
     try:
         thread = email_threads[thread_id]
         context = f"Thread subject: {thread['subject']}\n\n"
         for email in thread["emails"]:
             sender = email["sender"]["emailAddress"]["address"]
+            content += f"From: {sender}\n"
+            content += f"Date: {email['receivedDateTime']}\n"
+            content += f"Content: {email['bodyPreview']}\n\n"
         # Generate sample questions
         questions = [
             "What were the main points discussed in this thread?"
         ]
+        # Generate simple answers (simulating AI responses)
+        answers = [
+            f"The main topic appears to be '{thread['subject']}', which discusses project-related matters.",
+            f"The key participants include {', '.join(thread['participants'][:3])}" +
+            (f" and {len(thread['participants']) - 3} others" if len(thread['participants']) > 3 else ""),
+            f"The conversation started on {thread['start_date'].split('T')[0]} and the last message was on {thread['end_date'].split('T')[0]}.",
+            "The main points include updates on project status, discussion of requirements, and next steps."
+        ]
         # Create summary
         summary = f"This is an email thread with {thread['message_count']} messages about '{thread['subject']}'. "
+        summary += f"The conversation started on {thread['start_date'].split('T')[0]} and ended on {thread['end_date'].split('T')[0]}. "
         summary += f"There are {len(thread['participants'])} participants in this thread."
         # Store Q&A data
 # Initialize
 init_auth_app()
 # Create the Gradio interface
 with gr.Blocks(title="Email Thread Analyzer with AI Q&A") as demo:
                 auth_url_output = gr.Textbox(label="Authentication URL", interactive=False)
                 auth_code_input = gr.Textbox(label="Authorization Code")
                 auth_submit = gr.Button("Submit Authorization Code")
+                auth_status = gr.Textbox(label="Authentication Status", interactive=False)
     # Email Extraction section
     with gr.Tab("Email Extraction"):