Spaces:
Sleeping
Sleeping
Fix evaluation score
Browse files- __pycache__/classes.cpython-311.pyc +0 -0
- __pycache__/utils_evaluate.cpython-311.pyc +0 -0
- __pycache__/utils_evaluate_objections.cpython-311.pyc +0 -0
- __pycache__/utils_opportunity_review.cpython-311.pyc +0 -0
- __pycache__/utils_output.cpython-311.pyc +0 -0
- __pycache__/utils_prep.cpython-311.pyc +0 -0
- __pycache__/utils_prompt.cpython-311.pyc +0 -0
- classes.py +1 -1
- utils_evaluate.py +7 -4
- utils_evaluate_objections.py +18 -15
- utils_output.py +7 -5
- utils_prompt.py +3 -0
__pycache__/classes.cpython-311.pyc
CHANGED
Binary files a/__pycache__/classes.cpython-311.pyc and b/__pycache__/classes.cpython-311.pyc differ
|
|
__pycache__/utils_evaluate.cpython-311.pyc
CHANGED
Binary files a/__pycache__/utils_evaluate.cpython-311.pyc and b/__pycache__/utils_evaluate.cpython-311.pyc differ
|
|
__pycache__/utils_evaluate_objections.cpython-311.pyc
CHANGED
Binary files a/__pycache__/utils_evaluate_objections.cpython-311.pyc and b/__pycache__/utils_evaluate_objections.cpython-311.pyc differ
|
|
__pycache__/utils_opportunity_review.cpython-311.pyc
CHANGED
Binary files a/__pycache__/utils_opportunity_review.cpython-311.pyc and b/__pycache__/utils_opportunity_review.cpython-311.pyc differ
|
|
__pycache__/utils_output.cpython-311.pyc
CHANGED
Binary files a/__pycache__/utils_output.cpython-311.pyc and b/__pycache__/utils_output.cpython-311.pyc differ
|
|
__pycache__/utils_prep.cpython-311.pyc
CHANGED
Binary files a/__pycache__/utils_prep.cpython-311.pyc and b/__pycache__/utils_prep.cpython-311.pyc differ
|
|
__pycache__/utils_prompt.cpython-311.pyc
CHANGED
Binary files a/__pycache__/utils_prompt.cpython-311.pyc and b/__pycache__/utils_prompt.cpython-311.pyc differ
|
|
classes.py
CHANGED
@@ -41,7 +41,7 @@ class SessionState:
|
|
41 |
self.do_opportunity_analysis = True
|
42 |
self.do_customer_research = True
|
43 |
self.do_objections = False
|
44 |
-
self.add_objections_to_analysis =
|
45 |
self.ask_objections = True
|
46 |
self.use_objection_cache = True
|
47 |
self.do_ragas_evaluation = False
|
|
|
41 |
self.do_opportunity_analysis = True
|
42 |
self.do_customer_research = True
|
43 |
self.do_objections = False
|
44 |
+
self.add_objections_to_analysis = True
|
45 |
self.ask_objections = True
|
46 |
self.use_objection_cache = True
|
47 |
self.do_ragas_evaluation = False
|
utils_evaluate.py
CHANGED
@@ -10,10 +10,11 @@ from ragas.metrics import (
|
|
10 |
from rouge_score import rouge_scorer
|
11 |
from sentence_transformers import SentenceTransformer, util
|
12 |
|
13 |
-
from utils_evaluate_objections import
|
14 |
|
15 |
|
16 |
-
def evaluate_objections(session):
|
|
|
17 |
|
18 |
for response in session.responses:
|
19 |
question = response.get("question", "")
|
@@ -24,8 +25,10 @@ def evaluate_objections(session):
|
|
24 |
q_and_a = {
|
25 |
"objection": question,
|
26 |
"answer": answer
|
27 |
-
}
|
28 |
-
|
|
|
|
|
29 |
response["evaluation_score"] = score
|
30 |
|
31 |
|
|
|
10 |
from rouge_score import rouge_scorer
|
11 |
from sentence_transformers import SentenceTransformer, util
|
12 |
|
13 |
+
from utils_evaluate_objections import generate_objection_score
|
14 |
|
15 |
|
16 |
+
async def evaluate_objections(session):
|
17 |
+
print("evaluate_objections()")
|
18 |
|
19 |
for response in session.responses:
|
20 |
question = response.get("question", "")
|
|
|
25 |
q_and_a = {
|
26 |
"objection": question,
|
27 |
"answer": answer
|
28 |
+
}
|
29 |
+
print(q_and_a)
|
30 |
+
score = await generate_objection_score(q_and_a)
|
31 |
+
print(score)
|
32 |
response["evaluation_score"] = score
|
33 |
|
34 |
|
utils_evaluate_objections.py
CHANGED
@@ -1,18 +1,22 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
import pandas as pd
|
5 |
-
from typing import List, Tuple
|
6 |
-
from datetime import datetime
|
7 |
import sys
|
|
|
|
|
8 |
from dataclasses import dataclass, field
|
|
|
|
|
|
|
|
|
|
|
9 |
from ragas.metrics.base import MetricType
|
10 |
-
from ragas.
|
11 |
-
from ragas import
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
import os
|
16 |
# Load environment variables from .env file
|
17 |
dotenv.load_dotenv()
|
18 |
|
@@ -92,10 +96,8 @@ class SatisfyRate(MetricWithLLM, SingleTurnMetric):
|
|
92 |
)
|
93 |
return int(prompt_response.satisfy)
|
94 |
|
95 |
-
async def
|
96 |
-
|
97 |
-
from ragas.llms.base import LangchainLLMWrapper
|
98 |
-
import pandas as pd
|
99 |
# user_response= pd.read_csv(file_path)
|
100 |
openai_model = LangchainLLMWrapper(ChatOpenAI(model_name="gpt-4o", api_key=OPENAI_API_KEY))
|
101 |
scorer = SatisfyRate(llm=openai_model)
|
@@ -104,6 +106,7 @@ async def generate_objection_scores(question_answer):
|
|
104 |
|
105 |
#(user_response['objection'][num], user_response['response'][num])
|
106 |
satisfy_0_1 = await scorer.single_turn_ascore(sample)
|
|
|
107 |
|
108 |
print (question_answer['objection'], question_answer['answer'], satisfy_0_1)
|
109 |
# Implement your logic to generate a response based on the user's input
|
|
|
1 |
+
import asyncio
|
2 |
+
import dotenv
|
3 |
+
import os
|
4 |
import pandas as pd
|
|
|
|
|
5 |
import sys
|
6 |
+
import typing as t
|
7 |
+
|
8 |
from dataclasses import dataclass, field
|
9 |
+
from datetime import datetime
|
10 |
+
from langchain_openai import ChatOpenAI
|
11 |
+
from pydantic import BaseModel, Field
|
12 |
+
from ragas import SingleTurnSample
|
13 |
+
from ragas.llms.base import LangchainLLMWrapper
|
14 |
from ragas.metrics.base import MetricType
|
15 |
+
from ragas.metrics.base import MetricWithLLM, SingleTurnMetric
|
16 |
+
from ragas.prompt.pydantic_prompt import PydanticPrompt
|
17 |
+
from typing import List, Tuple
|
18 |
+
|
19 |
+
|
|
|
20 |
# Load environment variables from .env file
|
21 |
dotenv.load_dotenv()
|
22 |
|
|
|
96 |
)
|
97 |
return int(prompt_response.satisfy)
|
98 |
|
99 |
+
async def generate_objection_score(question_answer):
|
100 |
+
print("generate_objection_scores()")
|
|
|
|
|
101 |
# user_response= pd.read_csv(file_path)
|
102 |
openai_model = LangchainLLMWrapper(ChatOpenAI(model_name="gpt-4o", api_key=OPENAI_API_KEY))
|
103 |
scorer = SatisfyRate(llm=openai_model)
|
|
|
106 |
|
107 |
#(user_response['objection'][num], user_response['response'][num])
|
108 |
satisfy_0_1 = await scorer.single_turn_ascore(sample)
|
109 |
+
print(satisfy_0_1)
|
110 |
|
111 |
print (question_answer['objection'], question_answer['answer'], satisfy_0_1)
|
112 |
# Implement your logic to generate a response based on the user's input
|
utils_output.py
CHANGED
@@ -60,11 +60,13 @@ def format_datetime(dt):
|
|
60 |
async def display_evaluation_results(cl, session_state):
|
61 |
out_text = "*Preparing evaluation results ...*"
|
62 |
await cl.Message(content=out_text).send()
|
63 |
-
|
|
|
|
|
64 |
if session_state.do_evaluation:
|
65 |
evaluate_answers(session_state)
|
66 |
elif session_state.add_objections_to_analysis:
|
67 |
-
evaluate_objections(session_state)
|
68 |
await asyncio.sleep(1)
|
69 |
|
70 |
output = f"**Session Summary**"
|
@@ -82,9 +84,9 @@ async def display_evaluation_results(cl, session_state):
|
|
82 |
averages = results_df[columns_to_average].mean()
|
83 |
|
84 |
await cl.Message(content="**Overall Summary (By SalesBuddy)**").send()
|
85 |
-
output = f"**SalesBuddy Score:** {session_state.responses[-1]['overall_score']} \n"
|
86 |
output = output + f"**SalesBuddy Evaluation:** {session_state.responses[-1]['overall_evaluation']} \n"
|
87 |
-
output = output + f"**SalesBuddy Final Mood Score:** {session_state.responses[-1]['mood_score']} \n"
|
88 |
await cl.Message(content=output).send()
|
89 |
|
90 |
if session_state.do_ragas_evaluation:
|
@@ -101,7 +103,7 @@ async def display_evaluation_results(cl, session_state):
|
|
101 |
**Question:** {resp.get('question', 'N/A')}
|
102 |
**Answer:** {resp.get('response', 'N/A')}
|
103 |
**SalesBuddy Evaluation:** {resp.get('response_evaluation', 'N/A')}
|
104 |
-
**Evaluation Score:** {resp.get('
|
105 |
"""
|
106 |
if session_state.do_ragas_evaluation:
|
107 |
scores = session_state.scores[index]
|
|
|
60 |
async def display_evaluation_results(cl, session_state):
|
61 |
out_text = "*Preparing evaluation results ...*"
|
62 |
await cl.Message(content=out_text).send()
|
63 |
+
print("Checking evaluation and objection flags")
|
64 |
+
print(session_state.do_evaluation)
|
65 |
+
print(session_state.add_objections_to_analysis)
|
66 |
if session_state.do_evaluation:
|
67 |
evaluate_answers(session_state)
|
68 |
elif session_state.add_objections_to_analysis:
|
69 |
+
await evaluate_objections(session_state)
|
70 |
await asyncio.sleep(1)
|
71 |
|
72 |
output = f"**Session Summary**"
|
|
|
84 |
averages = results_df[columns_to_average].mean()
|
85 |
|
86 |
await cl.Message(content="**Overall Summary (By SalesBuddy)**").send()
|
87 |
+
output = f"**SalesBuddy Score (1-10):** {session_state.responses[-1]['overall_score']} \n"
|
88 |
output = output + f"**SalesBuddy Evaluation:** {session_state.responses[-1]['overall_evaluation']} \n"
|
89 |
+
output = output + f"**SalesBuddy Final Mood Score (1-10):** {session_state.responses[-1]['mood_score']} \n"
|
90 |
await cl.Message(content=output).send()
|
91 |
|
92 |
if session_state.do_ragas_evaluation:
|
|
|
103 |
**Question:** {resp.get('question', 'N/A')}
|
104 |
**Answer:** {resp.get('response', 'N/A')}
|
105 |
**SalesBuddy Evaluation:** {resp.get('response_evaluation', 'N/A')}
|
106 |
+
**Evaluation Score:** {resp.get('evaluation_score', 'N/A')}
|
107 |
"""
|
108 |
if session_state.do_ragas_evaluation:
|
109 |
scores = session_state.scores[index]
|
utils_prompt.py
CHANGED
@@ -103,6 +103,7 @@ def get_system_template_openai_short():
|
|
103 |
You are playing a role in a conversation with a sales representative.
|
104 |
Your name is in the 'Name:' section.
|
105 |
They can use your first name, full name or address you with a title and last name.
|
|
|
106 |
Your name does not need to match exactly what they say.
|
107 |
Be chatty and conversational and friendly.
|
108 |
Your compnay information is in the 'Company:' section.
|
@@ -116,10 +117,12 @@ def get_system_template_openai_short():
|
|
116 |
You can make conversation but you must follow the command.
|
117 |
If a previous question and answer are provided, you must evaluate the rep's answer.
|
118 |
You will perform evaluation based on how well and thoroughly the rep answered the previous question.
|
|
|
119 |
If asked to provide a conclusion, you must consider all of the rep's answers to your questions.
|
120 |
These are provided in the 'All questions and answers:' section.
|
121 |
You will ALWAYS provide your response in valid JSON format
|
122 |
Remember all string values must be enclosed in double quotes.
|
|
|
123 |
You will include with the following fields in JSON format:
|
124 |
- Continue: Yes or No depending on if you want to continue the conversation based on the reps answer to your question.
|
125 |
- Ask Follow Up: Yes or No depending on if you want to ask a follow up question.
|
|
|
103 |
You are playing a role in a conversation with a sales representative.
|
104 |
Your name is in the 'Name:' section.
|
105 |
They can use your first name, full name or address you with a title and last name.
|
106 |
+
If they get your name wrong, you can correct them once.
|
107 |
Your name does not need to match exactly what they say.
|
108 |
Be chatty and conversational and friendly.
|
109 |
Your compnay information is in the 'Company:' section.
|
|
|
117 |
You can make conversation but you must follow the command.
|
118 |
If a previous question and answer are provided, you must evaluate the rep's answer.
|
119 |
You will perform evaluation based on how well and thoroughly the rep answered the previous question.
|
120 |
+
If the reps answer does not make sense or is not clear, set the score to a 1.
|
121 |
If asked to provide a conclusion, you must consider all of the rep's answers to your questions.
|
122 |
These are provided in the 'All questions and answers:' section.
|
123 |
You will ALWAYS provide your response in valid JSON format
|
124 |
Remember all string values must be enclosed in double quotes.
|
125 |
+
Remember do not include a question in your response.
|
126 |
You will include with the following fields in JSON format:
|
127 |
- Continue: Yes or No depending on if you want to continue the conversation based on the reps answer to your question.
|
128 |
- Ask Follow Up: Yes or No depending on if you want to ask a follow up question.
|