import time import openai import os import pandas as pd import gradio as gr from llama_index import StorageContext, load_index_from_storage from openai.embeddings_utils import get_embedding, cosine_similarity openai.api_key = os.getenv("OPENAI_API_KEY") passwd = os.getenv("PASSWD_SECRET") title = "Confidential forensics tool with ChatGPT" examples = ["Who is Phillip Allen?", "What the project in Austin is about?", "Give me more details about the real estate project"] file_metadata = lambda x: {"filename": x} storage_context = StorageContext.from_defaults(persist_dir="./storage") index = load_index_from_storage(storage_context) query_engine = index.as_query_engine( similarity_top_k=3, ) default_mail_name = "no title" df = pd.read_csv("metadata.csv", delimiter=";") df["subject"] = df["subject"].replace([None], default_mail_name) df2 = df["subject"].head(10).to_frame() df2 = df2.rename({'subject': 'Emails titles'}, axis=1) def get_email_subject(response): emails = [] for node in response.source_nodes: email = node.node.extra_info["filename"].split("\\")[-1].split(".")[0] emails.append(email) mask = df.email_name.apply(lambda x: x in emails) return df.loc[mask] # def get_email_subject(response): # podcasts = [] # for node in response.source_nodes: # podcast = node.node.extra_info["filename"].split("/")[-1].split(".")[0] # podcasts.append(podcast) def search_emails(opt, message, n=3): "Outputs the top n emails that match the most the pattern" if len(message.strip()) < 1: message = "Oops, it looks like your query was not valid. Please make sure you typed something in your text box and then try again." else: try: embedding = get_embedding(message) message = "" df['similarities'] = df.embedding.apply(func=(lambda x: cosine_similarity(x, embedding))) message_tmp = df.sort_values('similarities', ascending=False).head(n) message_tmp = [(row.file, row.body, row.similarities) for index, row in message_tmp.iterrows()] for msg in message_tmp: message += f"Mail ID: {msg[0]}\nContent: {msg[1].strip()}\nSimilarity score: {msg[2]}\n\n" except Exception as e: message = "An error occured when handling your query, please try again." print(e) return message, "" def respond_upload(btn_upload, message, chat_history): time.sleep(2) message = "***File uploaded***" bot_message = "Your document has been uploaded and will be accounted for your queries." chat_history.append((message, bot_message)) return btn_upload, "", chat_history def respond2(message, chat_history, box, btn): message, chat_history = respond_common(message, chat_history, box, btn) return message, chat_history, box def respond(message, chat_history): message, chat_history = respond_common(message, chat_history) return "", chat_history def respond_common(message, chat_history, box=None, btn=None): if len(message.strip()) < 1: message = "***Empty***" bot_message = "Oops, it looks like your query was not valid. Please make sure you typed something in your text box and then try again." else: try: resp = query_engine.query(message) bot_message = str(resp).strip() get_email_subject(resp) bot_message += "\n\n\n\nSource(s):\n\n" for i, row in get_email_subject(resp).iterrows(): bot_message += f"Email ID: **{row.email_name}**\n**Subject: {row.subject}**\n" except Exception as e: bot_message = "An error occured when handling your query, please try again." print(e) chat_history.append((message, bot_message)) return message, chat_history with gr.Blocks(title=title) as demo: gr.Markdown( """ # """ + title + """ """) dat = gr.Dataframe( value=df2, max_cols=1, max_rows=4, overflow_row_behaviour="paginate", ) btn_upload = gr.UploadButton("Upload a new document...", file_types=["text"]) gr.Markdown( """ ## Chatbot """) chatbot = gr.Chatbot().style(height=400) with gr.Row(): with gr.Column(scale=0.85): msg = gr.Textbox( show_label=False, placeholder="Enter text and press enter, or click on Send.", ).style(container=False) with gr.Column(scale=0.15, min_width=0): btn_send = gr.Button("Send your query") with gr.Row(): gr.Markdown( """ Example of queries """) for ex in examples: btn = gr.Button(ex) btn.click(respond2, [btn, chatbot, msg], [btn, chatbot, msg]) msg.submit(respond, [msg, chatbot], [msg, chatbot]) btn_send.click(respond, [msg, chatbot], [msg, chatbot]) btn_upload.upload(respond_upload, [btn_upload, msg, chatbot], [btn_upload, msg, chatbot]) # gr.Markdown( # """ # ## Search the matching document # """) # opt = gr.Textbox( # show_label=False, # placeholder="The document matching with your query will be shown here.", # interactive=False, # lines=8 # ) # with gr.Row(): # with gr.Column(scale=0.85): # msg2 = gr.Textbox( # show_label=False, # placeholder="Enter text and press enter, or click on Send.", # ).style(container=False) # with gr.Column(scale=0.15, min_width=0): # btn_send2 = gr.Button("Send your query") # btn_send2.click(search_emails, [opt, msg2], [opt, msg2]) if __name__ == "__main__": demo.launch(auth=("mithril", passwd))