Spaces:

improvation
/

EmailResponseTracking

Running

App Files Files Community

AMead10 commited on Feb 17, 2024

Commit

6af554a

1 Parent(s): b851225

phase 2 initial

Browse files

Files changed (3) hide show

.gitignore +2 -0
app.py +132 -7
requirements.txt +4 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ __pycache__
2	+ examples

app.py CHANGED Viewed

@@ -1,20 +1,31 @@
 import gradio as gr
 import mysql.connector
 import os
 # Use a pipeline as a high-level helper
 from transformers import pipeline
 classifier_model = pipeline(
     "zero-shot-classification", model="MoritzLaurer/deberta-v3-large-zeroshot-v1"
 )
 # get db info from env vars
 db_host = os.environ.get("DB_HOST")
 db_user = os.environ.get("DB_USER")
 db_pass = os.environ.get("DB_PASS")
 db_name = os.environ.get("DB_NAME")
 db_connection = mysql.connector.connect(
     host=db_host,
@@ -29,6 +40,14 @@ ORG_ID = 731
 potential_labels = []
 def get_potential_labels():
     # get potential labels from db
@@ -48,7 +67,7 @@ potential_labels = get_potential_labels()
 # Function to handle the classification
-def classify_email(constituent_email):
     potential_labels = get_potential_labels()
     print("classifying email")
     model_out = classifier_model(constituent_email, potential_labels, multi_label=True)
@@ -62,9 +81,43 @@ def classify_email(constituent_email):
         # Find the index of the highest score
         max_score_index = model_out["scores"].index(max(model_out["scores"]))
         # Return the label with the highest score
-        return model_out["labels"][max_score_index]
-    return ", ".join(top_labels)
 def remove_spaces_after_comma(s):
@@ -73,6 +126,78 @@ def remove_spaces_after_comma(s):
     return ",".join(parts)
 # Function to handle saving data
 def save_data(orig_user_email, constituent_email, labels, user_response, current_user):
     # save the data to the database
@@ -224,9 +349,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
     # Define button actions
     classify_button.click(
-        fn=classify_email,
-        inputs=constituent_response_input,
-        outputs=classification_output,
     )
     save_button.click(
@@ -242,4 +367,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as app:
     )
 # Launch the app
-app.launch(auth=auth, debug=True)

 import gradio as gr
 import mysql.connector
 import os
+from langchain_openai import ChatOpenAI
+from langchain_core.prompts import (
+    ChatPromptTemplate,
+    PromptTemplate,
+    FewShotPromptTemplate,
+)
 # Use a pipeline as a high-level helper
 from transformers import pipeline
+from sentence_transformers import SentenceTransformer, util
 classifier_model = pipeline(
     "zero-shot-classification", model="MoritzLaurer/deberta-v3-large-zeroshot-v1"
 )
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
 # get db info from env vars
 db_host = os.environ.get("DB_HOST")
 db_user = os.environ.get("DB_USER")
 db_pass = os.environ.get("DB_PASS")
 db_name = os.environ.get("DB_NAME")
+openai_api_key = os.environ.get("OPENAI_API_KEY")
 db_connection = mysql.connector.connect(
     host=db_host,
 potential_labels = []
+llm = ChatOpenAI(openai_api_key=openai_api_key, model="gpt-4")
+system_prompt = "You are a representative for a local government. A constituent has reached out to you with a question about a local policy. Base your response using the examples below. Be sure to address all points and concerns raised by the constituent. If you do not have enough information to be able to answer the question (you do not see an example that answers the question from the constituent), please make note and another representative will fill in the missing information.\n\n"
+examples_prompt = PromptTemplate(
+    input_variables=["example"], template="Example:\n\n {example}"
+)
 def get_potential_labels():
     # get potential labels from db
 # Function to handle the classification
+def classify_email_and_generate_response(representative_email, constituent_email):
     potential_labels = get_potential_labels()
     print("classifying email")
     model_out = classifier_model(constituent_email, potential_labels, multi_label=True)
         # Find the index of the highest score
         max_score_index = model_out["scores"].index(max(model_out["scores"]))
         # Return the label with the highest score
+        top_labels = [model_out["labels"][max_score_index]]
+    labels_with_enough_examples = ["Enforcement", "Financial", "Rules"]
+    # see if any of the labels are in labels_with_enough_examples, if so get the messages for that category, else return
+    examples = get_similar_messages(constituent_email)
+    if representative_email != "":
+        current_thread = (
+            "Representative message: \n\n"
+            + representative_email
+            + "\n\nConstituent message: \n\n"
+            + constituent_email
+        )
+    else:
+        current_thread = "Constituent message: \n\n" + constituent_email
+    prompt = FewShotPromptTemplate(
+        examples=examples,
+        example_prompt=examples_prompt,
+        prefix=system_prompt,
+        suffix="Current thread:\n\n {current_thread}\n\nYour response:\n\n",
+        input_variables=["current_thread"],
+    )
+    formatted_prompt = prompt.format(current_thread=current_thread)
+    print(formatted_prompt)
+    print("Generating GPT4 response")
+    import time
+    start = time.time()
+    out = "GPT4:\n\n" + llm.invoke(formatted_prompt).content
+    print("GPT4 response generated in", time.time() - start, "seconds")
+    return ", ".join(top_labels), out
 def remove_spaces_after_comma(s):
     return ",".join(parts)
+def get_similar_messages(constituent_email):
+    db_connection = mysql.connector.connect(
+        host=db_host,
+        user=db_user,
+        password=db_pass,
+        database=db_name,
+    )
+    db_cursor = db_connection.cursor()
+    messages_for_category = db_cursor.execute(
+        "SELECT id, person_id, body FROM radmap_frog12.messages WHERE id IN (SELECT message_id FROM radmap_frog12.message_category_associations)"
+    )
+    messages_for_category = db_cursor.fetchall()
+    all_message_chains = []
+    for message in messages_for_category:
+        # TODO: refactor for when integrated with RADMAP
+        # if person_id is set
+        if message[1] != 0:
+            message_chain = "Representative message: \n\n" + message[2] + "\n\n"
+            is_representative_turn = False
+        else:
+            message_chain = "Constituent message: \n\n" + message[2] + "\n\n"
+            is_representative_turn = True
+            embedding = embedding_model.encode([message[2]])[0]
+        next_message_id = message[0]
+        while next_message_id:
+            next_message = db_cursor.execute(
+                "SELECT id, body FROM radmap_frog12.messages WHERE previous_message_id = %s",
+                (next_message_id,),
+            )
+            next_message = db_cursor.fetchall()
+            if not next_message:
+                break
+            if is_representative_turn:
+                message_chain += (
+                    "Representative message: \n\n" + next_message[0][1] + "\n\n"
+                )
+                is_representative_turn = False
+            else:
+                message_chain += (
+                    "Constituent message: \n\n" + next_message[0][1] + "\n\n"
+                )
+                is_representative_turn = True
+                embedding = embedding_model.encode([next_message[0][1]])[0]
+            next_message_id = next_message[0][0]
+        all_message_chains.append((message_chain, embedding))
+        target_embedding = embedding_model.encode([constituent_email])[0]
+        # Compute cosine-similarities and keep the top 3 most similar sentences
+        top_messages = []
+        for message, embedding in all_message_chains:
+            cosine_score = util.pytorch_cos_sim(embedding, target_embedding)
+            if cosine_score > 0.98:
+                continue
+            top_messages.append((message, cosine_score))
+        top_messages = sorted(top_messages, key=lambda x: x[1], reverse=True)
+    return [{"example": message} for message, score in top_messages[0:3]]
 # Function to handle saving data
 def save_data(orig_user_email, constituent_email, labels, user_response, current_user):
     # save the data to the database
     # Define button actions
     classify_button.click(
+        fn=classify_email_and_generate_response,
+        inputs=[original_email_input, constituent_response_input],
+        outputs=[classification_output, user_response_input],
     )
     save_button.click(
     )
 # Launch the app
+app.launch(debug=True)

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 mysql-connector-python
 torch
-transformers

 mysql-connector-python
 torch
+transformers
+langchain-openai
+langchain-core
+sentence_transformers