File size: 4,910 Bytes
b73e690
c03941e
 
b73e690
c03941e
20cd498
c03941e
 
 
 
794fcfd
 
b73e690
adabb02
 
b73e690
9e5cc64
 
e20b45b
c03941e
 
 
 
 
 
 
 
 
da5b0b4
c03941e
 
 
 
e20b45b
c03941e
 
 
 
b73e690
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adabb02
b73e690
 
 
e20b45b
c03941e
 
e20b45b
c03941e
b73e690
c03941e
b73e690
 
 
 
 
 
c03941e
b73e690
 
 
 
 
 
 
 
 
 
 
 
adabb02
b73e690
20cd498
 
b73e690
 
adabb02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20cd498
c03941e
adabb02
20cd498
9e5cc64
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import time
import openai
import os
import pandas as pd
import numpy as np
import gradio as gr
from llama_index import GPTSimpleVectorIndex
from openai.embeddings_utils import get_embedding, cosine_similarity

openai.api_key = os.getenv("OPENAI_API_KEY")
passwd = os.getenv("PASSWD_SECRET")

title = "Confidential forensics tool with ChatGPT"
examples = ["Who is Phillip Allen?", "What the project in Austin is about?", "Give me more details about the real estate project"]

index = GPTSimpleVectorIndex.load_from_disk('email.json')

df = pd.read_feather("emails.feather")
df2 = df["subject"].replace([None], "<no title>").head(10).to_frame()

def search_emails(opt, message, n=3):
    "Outputs the top n emails that match the most the pattern"
    if len(message.strip()) < 1:
        message = "Oops, it looks like your query was not valid. Please make sure you typed something in your text box and then try again."
    else:
        try:
            embedding = get_embedding(message)
            message = ""
            df['similarities'] = df.embedding.apply(func=(lambda x: cosine_similarity(x, embedding)))

            message_tmp = df.sort_values('similarities', ascending=False).head(n)
            message_tmp = [(row.file, row.body, row.similarities) for index, row in message_tmp.iterrows()]
            for msg in message_tmp:
                message += f"Mail ID: {msg[0]}\nContent: {msg[1].strip()}\nSimilarity score: {msg[2]}\n\n"
        except Exception as e:
            message = "An error occured when handling your query, please try again."
            print(e)
    return message, ""

def respond_upload(btn_upload, message, chat_history):
    time.sleep(2)
    message = "***File uploaded***"
    bot_message = "Your document has been uploaded and will be accounted for your queries."
    chat_history.append((message, bot_message))
    return btn_upload, "", chat_history

def respond2(message, chat_history, box, btn):
    if len(message.strip()) < 1:
        message = "***Empty***"
        bot_message = "Oops, it looks like your query was not valid. Please make sure you typed something in your text box and then try again."
    else:
        try:
            bot_message = str(index.query(message)).strip()
        except:
            bot_message = "An error occured when handling your query, please try again."
    chat_history.append((message, bot_message))
    return message, chat_history, box

def respond(message, chat_history):
    if len(message.strip()) < 1:
        message = "***Empty***"
        bot_message = "Oops, it looks like your query was not valid. Please make sure you typed something in your text box and then try again."
    else:
        try:
            bot_message = str(index.query(message)).strip()
        except:
            bot_message = "An error occured when handling your query, please try again."
    chat_history.append((message, bot_message))
    return "", chat_history

with gr.Blocks(title=title) as demo:
    gr.Markdown(
    """

    # """ + title + """
    """)
    dat = gr.Dataframe(
            value=df2,
            max_cols=1,
            max_rows=4,
            title="Documents loaded",
            overflow_row_behaviour="paginate",
    )
    btn_upload = gr.UploadButton("Upload a new document...", file_types=["text"])
    gr.Markdown(
    """
    ## Chatbot
    """)
    chatbot = gr.Chatbot().style(height=400)
    with gr.Row():
        with gr.Column(scale=0.85):
            msg = gr.Textbox(
                show_label=False,
                placeholder="Enter text and press enter, or click on Send.",
            ).style(container=False)
        with gr.Column(scale=0.15, min_width=0):
            btn_send = gr.Button("Send your query")
    with gr.Row():
        gr.Markdown(
        """
        Example of queries
        """)
        for ex in examples:
            btn = gr.Button(ex)
            btn.click(respond2, [btn, chatbot, msg], [btn, chatbot, msg])

    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    btn_send.click(respond, [msg, chatbot], [msg, chatbot])
    btn_upload.upload(respond_upload, [btn_upload, msg, chatbot], [btn_upload, msg, chatbot])
    gr.Markdown(
    """

    ## Search the matching document
    """)
    opt = gr.Textbox(
        show_label=False,
        placeholder="The document matching with your query will be shown here.",
        interactive=False,
        lines=8
    )
    with gr.Row():
        with gr.Column(scale=0.85):
            msg2 = gr.Textbox(
                show_label=False,
                placeholder="Enter text and press enter, or click on Send.",
            ).style(container=False)
        with gr.Column(scale=0.15, min_width=0):
            btn_send2 = gr.Button("Send your query")

    btn_send2.click(search_emails, [opt, msg2], [opt, msg2])
    
if __name__ == "__main__":
    # demo.launch(auth=("mithril", passwd))
    demo.launch()