Spaces:
Sleeping
Sleeping
import json | |
import os | |
import re | |
from langchain.document_loaders import CSVLoader, PyPDFLoader, Docx2txtLoader | |
from langgraph.graph import StateGraph, END | |
from langchain.prompts import PromptTemplate | |
from langchain.schema import Document, AIMessage | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_openai import ChatOpenAI, OpenAIEmbeddings | |
from pathlib import Path | |
from pydantic import BaseModel, Field | |
from qdrant_client import QdrantClient | |
from qdrant_client.models import Distance, VectorParams, PointStruct | |
from typing import List, Dict, Any | |
from pydantic import BaseModel, Field | |
from typing import Dict, Any | |
llm = ChatOpenAI(model_name="gpt-4o") | |
embeddings = OpenAIEmbeddings(model="text-embedding-3-small") | |
qdrant = QdrantClient(":memory:") # In-memory Qdrant instance | |
# Create collection | |
qdrant.create_collection( | |
collection_name="opportunities", | |
vectors_config=VectorParams(size=1536, distance=Distance.COSINE), | |
) | |
class State(BaseModel): | |
file_path: str | |
document_processed: str = "" | |
opportunity_evaluation: Dict[str, Any] = Field(default_factory=dict) | |
next_action: Dict[str, Any] = Field(default_factory=dict) | |
def dict_representation(self) -> Dict[str, Any]: | |
return { | |
"file_path": self.file_path, | |
"document_processed": self.document_processed, | |
"opportunity_evaluation": self.opportunity_evaluation, | |
"next_action": self.next_action | |
} | |
async def prep_opportunity_review(session_state): | |
file_path = prep_document() | |
structured_results = run_analysis(file_path) | |
opportunity_review_report = create_opportunity_review_report(structured_results) | |
session_state.opportunity_review_results = structured_results | |
session_state.opportunity_review_report = opportunity_review_report | |
def prep_document(): | |
file_path = "data/HSBC Opportunity Information.docx" | |
path = Path(file_path) | |
if path.exists(): | |
if path.is_file(): | |
print(f"File found: {path}") | |
print(f"File size: {path.stat().st_size / 1024:.2f} KB") | |
print(f"Last modified: {path.stat().st_mtime}") | |
print("File is ready for processing.") | |
if os.access(path, os.R_OK): | |
print("File is readable.") | |
else: | |
print("Warning: File exists but may not be readable. Check permissions.") | |
else: | |
print(f"Error: {path} exists but is not a file. It might be a directory.") | |
else: | |
print(f"Error: File not found at {path}") | |
print("Please check the following:") | |
print("1. Ensure the file path is correct.") | |
print("2. Verify that the file exists in the specified location.") | |
print("3. Check if you have the necessary permissions to access the file.") | |
parent = path.parent | |
if not parent.exists(): | |
print(f"Note: The directory {parent} does not exist.") | |
elif not parent.is_dir(): | |
print(f"Note: {parent} exists but is not a directory.") | |
file_path_for_processing = str(path) | |
return file_path_for_processing | |
def load_and_chunk_document(file_path: str) -> List[Document]: | |
"""Load and chunk the document based on file type.""" | |
if not os.path.exists(file_path): | |
raise FileNotFoundError(f"File not found: {file_path}") | |
_, file_extension = os.path.splitext(file_path.lower()) | |
if file_extension == '.csv': | |
loader = CSVLoader(file_path) | |
elif file_extension == '.pdf': | |
loader = PyPDFLoader(file_path) | |
elif file_extension == '.docx': | |
loader = Docx2txtLoader(file_path) | |
else: | |
raise ValueError(f"Unsupported file type: {file_extension}") | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
return text_splitter.split_documents(documents) | |
def agent_1(file_path: str) -> str: | |
"""Agent 1: Load, chunk, embed, and store document in Qdrant.""" | |
try: | |
chunks = load_and_chunk_document(file_path) | |
points = [] | |
for i, chunk in enumerate(chunks): | |
vector = embeddings.embed_query(chunk.page_content) | |
points.append(PointStruct(id=i, vector=vector, payload={"text": chunk.page_content})) | |
qdrant.upsert( | |
collection_name="opportunities", | |
points=points | |
) | |
return f"Document processed and stored in Qdrant. {len(chunks)} chunks created." | |
except Exception as e: | |
print(f"Error in agent_1: {str(e)}") | |
return f"Error processing document: {str(e)}" | |
def agent_2() -> Dict[str, Any]: | |
"""Agent 2: Evaluate opportunity based on MEDDIC criteria.""" | |
try: | |
results = qdrant.scroll(collection_name="opportunities", limit=100) | |
if not results or len(results[0]) == 0: | |
raise ValueError("No documents found in Qdrant") | |
full_text = " ".join([point.payload.get("text", "") for point in results[0]]) | |
meddic_template = """ | |
Analyze the following opportunity information using the MEDDIC sales methodology: | |
{opportunity_info} | |
Assign an overall opportunity score (1-100) with 100 means that the opportunity is a sure win. | |
Provide a Summary of the opportunity. There must always be a summary. Ensure the summary is on the | |
same line as the Summary: title | |
Evaluate the opportunity based on each MEDDIC criterion and assign a score for each criterion: | |
1. Metrics | |
2. Economic Buyer | |
3. Decision Criteria | |
4. Decision Process | |
5. Identify Pain | |
6. Champion | |
Format your response as follows: | |
Summary: [Opportunity Summary] | |
Score: [Overall Opportunity Score between 1 to 100 based on MEDDIC criteria] | |
MEDDIC Evaluation: | |
- Metrics: [Score on Metrics, Evaluation on Metrics criterion] | |
- Economic Buyer: [Score on Economic Buyer, Evaluation on Economic Buyer criterion] | |
- Decision Criteria: [Score on Decision Criteria, Evaluation on Decision Criteria criterion] | |
- Decision Process: [Score on Decision Process, Evaluation on Decision Process criterion] | |
- Identify Pain: [Score on Identify Pain, Evaluation on Identify Pain criterion] | |
- Champion: [Score on Champion, Evaluation on Champion criterion] | |
""" | |
meddic_prompt = PromptTemplate(template=meddic_template, input_variables=["opportunity_info"]) | |
meddic_chain = meddic_prompt | llm | |
response = meddic_chain.invoke({"opportunity_info": full_text}) | |
if isinstance(response, AIMessage): | |
response_content = response.content | |
elif isinstance(response, str): | |
response_content = response | |
else: | |
raise ValueError(f"Unexpected response type: {type(response)}") | |
print(response_content) | |
# Parse the response content | |
lines = response_content.split('\n') | |
summary = next((line.split('Summary:')[1].strip() for line in lines if line.startswith('Summary:')), 'N/A') | |
print(summary) | |
score = next((int(line.split('Score:')[1].strip()) for line in lines if line.startswith('Score:')), 0) | |
print(score) | |
meddic_eval = {} | |
current_criterion = None | |
for line in lines: | |
if line.strip().startswith('-'): | |
parts = line.split(':', 1) | |
if len(parts) == 2: | |
current_criterion = parts[0].strip('- ') | |
meddic_eval[current_criterion] = parts[1].strip() | |
elif current_criterion and line.strip(): | |
meddic_eval[current_criterion] += ' ' + line.strip() | |
return { | |
'summary': summary, | |
'score': score, | |
'meddic_evaluation': meddic_eval | |
} | |
except Exception as e: | |
print(f"Error in agent_2: {str(e)}") | |
return { | |
'summary': "Error occurred during evaluation", | |
'score': 0, | |
'meddic_evaluation': str(e) | |
} | |
def clean_and_parse_json(json_string): | |
# Remove triple backticks and "json" label if present | |
json_string = re.sub(r'^```json\s*', '', json_string) | |
json_string = re.sub(r'\s*```$', '', json_string) | |
# Remove any leading/trailing whitespace | |
json_string = json_string.strip() | |
# Parse the cleaned JSON string | |
try: | |
return json.loads(json_string) | |
except json.JSONDecodeError as e: | |
print(f"Error parsing JSON: {e}") | |
return None | |
def agent_2_json() -> Dict[str, Any]: | |
"""Agent 2: Evaluate opportunity based on MEDDIC criteria.""" | |
try: | |
results = qdrant.scroll(collection_name="opportunities", limit=100) | |
if not results or len(results[0]) == 0: | |
raise ValueError("No documents found in Qdrant") | |
full_text = " ".join([point.payload.get("text", "") for point in results[0]]) | |
meddic_template = """ | |
Analyze the following opportunity information using the MEDDIC sales methodology: | |
{opportunity_info} | |
Assign an overall opportunity score (1-100) with 100 means that the opportunity is a sure win. | |
Provide a Summary of the opportunity. There must always be a summary. Ensure the summary is on the | |
same line as the Summary: title | |
Evaluate the opportunity based on each MEDDIC criterion and assign a score for each criterion: | |
1. Metrics | |
2. Economic Buyer | |
3. Decision Criteria | |
4. Decision Process | |
5. Identify Pain | |
6. Champion | |
Your response must be in JSON format. | |
Use the following keys in the JSON: | |
Summary: Opportunity Summary | |
Score: Overall Opportunity Score between 1 to 100 based on MEDDIC criteria | |
Metrics Score: MEDDIC score on Metrics | |
Metrics Evaluation: Evaluation on MEDDIC Metrics criterion | |
Economic Buyer Score: MEDDIC Score on Economic Buyer | |
Economic Buyer Evaluation: Evaluation on MEDDIC Economic Buyer criterion | |
Decision Criteria Score: MEDDIC Score on Decision Criteria | |
Decision Criteria Evaluation: Evaluation on MEDDIC Decision Criteria criterion | |
Decision Process Score: MEDDIC Score on Decision Process | |
Decision Process Evaluation: Evaluation on MEDDIC Decision Process criterion | |
Identify Pain Score: MEDDIC Score on Identify Pain | |
Identify Pain Evaluation: Evaluation on MEDDIC Identify Pain criterion | |
Champion Score: MEDDIC Score on Champion | |
Champion Evaluation: Evaluation on MEDDIC Champion criterion | |
""" | |
meddic_prompt = PromptTemplate(template=meddic_template, input_variables=["opportunity_info"]) | |
meddic_chain = meddic_prompt | llm | |
response = meddic_chain.invoke({"opportunity_info": full_text}) | |
if isinstance(response, AIMessage): | |
response_content = response.content | |
elif isinstance(response, str): | |
response_content = response | |
else: | |
raise ValueError(f"Unexpected response type: {type(response)}") | |
print(response_content) | |
meddic_data = clean_and_parse_json(response_content) | |
print("jsonified") | |
print(meddic_data) | |
# Create the output structure | |
output = { | |
'summary': meddic_data.get('Summary', 'N/A'), | |
'score': meddic_data.get('Score', 0), | |
'meddic_evaluation': { | |
'Metrics': f"{meddic_data.get('Metrics Score', 'N/A')} - {meddic_data.get('Metrics Evaluation', 'N/A')}", | |
'Economic Buyer': f"{meddic_data.get('Economic Buyer Score', 'N/A')} - {meddic_data.get('Economic Buyer Evaluation', 'N/A')}", | |
'Decision Criteria': f"{meddic_data.get('Decision Criteria Score', 'N/A')} - {meddic_data.get('Decision Criteria Evaluation', 'N/A')}", | |
'Decision Process': f"{meddic_data.get('Decision Process Score', 'N/A')} - {meddic_data.get('Decision Process Evaluation', 'N/A')}", | |
'Identify Pain': f"{meddic_data.get('Identify Pain Score', 'N/A')} - {meddic_data.get('Identify Pain Evaluation', 'N/A')}", | |
'Champion': f"{meddic_data.get('Champion Score', 'N/A')} - {meddic_data.get('Champion Evaluation', 'N/A')}" | |
} | |
} | |
print("output") | |
print(output) | |
return output | |
except Exception as e: | |
print(f"Error in agent_2_json: {str(e)}") | |
return { | |
'summary': "Error occurred during evaluation", | |
'score': 0, | |
'meddic_evaluation': str(e) | |
} | |
def agent_3(meddic_evaluation: Dict[str, Any]) -> Dict[str, Any]: | |
"""Agent 3: Suggest next best action and talking points.""" | |
try: | |
next_action_template = """ | |
Based on the following MEDDIC evaluation of an opportunity: | |
{meddic_evaluation} | |
Suggest the next best action for the upcoming customer meeting and provide the top 3 talking points. | |
Format your response as follows: | |
Next Action: [Your suggested action] | |
Talking Points: | |
1. [First talking point] | |
2. [Second talking point] | |
3. [Third talking point] | |
""" | |
next_action_prompt = PromptTemplate(template=next_action_template, input_variables=["meddic_evaluation"]) | |
next_action_chain = next_action_prompt | llm | |
response = next_action_chain.invoke({"meddic_evaluation": json.dumps(meddic_evaluation)}) | |
if isinstance(response, AIMessage): | |
response_content = response.content | |
elif isinstance(response, str): | |
response_content = response | |
else: | |
raise ValueError(f"Unexpected response type: {type(response)}") | |
# Parse the response content | |
lines = response_content.split('\n') | |
next_action = next((line.split('Next Action:')[1].strip() for line in lines if line.startswith('Next Action:')), 'N/A') | |
talking_points = [line.split('.')[1].strip() for line in lines if line.strip().startswith(('1.', '2.', '3.'))] | |
return { | |
'next_action': next_action, | |
'talking_points': talking_points | |
} | |
except Exception as e: | |
print(f"Error in agent_3: {str(e)}") | |
return { | |
'next_action': "Error occurred while suggesting next action", | |
'talking_points': [str(e)] | |
} | |
def process_document(state: State) -> State: | |
print("Agent 1: Processing document...") | |
file_path = state.file_path | |
result = agent_1(file_path) | |
return State(file_path=state.file_path, document_processed=result) | |
def evaluate_opportunity(state: State) -> State: | |
print("Agent 2: Evaluating opportunity...") | |
result = agent_2_json() | |
return State(file_path=state.file_path, document_processed=state.document_processed, opportunity_evaluation=result) | |
def suggest_next_action(state: State) -> State: | |
print("Agent 3: Suggesting next actions...") | |
result = agent_3(state.opportunity_evaluation) | |
return State(file_path=state.file_path, document_processed=state.document_processed, opportunity_evaluation=state.opportunity_evaluation, next_action=result) | |
def define_graph() -> StateGraph: | |
workflow = StateGraph(State) | |
workflow.add_node("process_document", process_document) | |
workflow.add_node("evaluate_opportunity", evaluate_opportunity) | |
workflow.add_node("suggest_next_action", suggest_next_action) | |
workflow.set_entry_point("process_document") | |
workflow.add_edge("process_document", "evaluate_opportunity") | |
workflow.add_edge("evaluate_opportunity", "suggest_next_action") | |
return workflow | |
def run_analysis(file_path: str) -> Dict[str, Any]: | |
if not os.path.exists(file_path): | |
return {"error": f"File not found: {file_path}"} | |
graph = define_graph() | |
initial_state = State(file_path=file_path) | |
try: | |
app = graph.compile() | |
final_state = app.invoke(initial_state) | |
# Convert the final state to a dictionary manually | |
structured_results = { | |
"file_path": final_state["file_path"], | |
"document_processed": final_state["document_processed"], | |
"opportunity_evaluation": final_state["opportunity_evaluation"], | |
"next_action": final_state["next_action"] | |
} | |
# Print a summary of the results | |
print("\n--- Analysis Results ---") | |
print(f"Document Processing: {'Successful' if 'Error' not in structured_results['document_processed'] else 'Failed'}") | |
print(f"Details: {structured_results['document_processed']}") | |
if isinstance(structured_results['opportunity_evaluation'], dict): | |
print("\nOpportunity Evaluation:") | |
print(f"Summary: {structured_results['opportunity_evaluation'].get('summary', 'N/A')}") | |
print(f"Score: {structured_results['opportunity_evaluation'].get('score', 'N/A')}") | |
print("MEDDIC Evaluation:") | |
for criterion, evaluation in structured_results['opportunity_evaluation'].get('meddic_evaluation', {}).items(): | |
print(f"{criterion}: {evaluation}") | |
else: | |
print("\nOpportunity Evaluation:") | |
print(f"Error: {structured_results['opportunity_evaluation']}") | |
if isinstance(structured_results['next_action'], dict): | |
print("\nNext Action:") | |
print(f"Action: {structured_results['next_action'].get('next_action', 'N/A')}") | |
print("Talking Points:") | |
for i, point in enumerate(structured_results['next_action'].get('talking_points', []), 1): | |
print(f" {i}. {point}") | |
else: | |
print("\nNext Action:") | |
print(f"Error: {structured_results['next_action']}") | |
return structured_results | |
except Exception as e: | |
print(f"An error occurred during analysis: {str(e)}") | |
return {"error": str(e)} | |
def create_opportunity_review_report(structured_results): | |
opportunity_review_report = "" | |
opportunity_review_report += "**Analysis Results**\n\n" | |
if 'Error' in structured_results['document_processed']: | |
opportunity_review_report += f"Opportunity Analysis Failed\n" | |
else: | |
if isinstance(structured_results['opportunity_evaluation'], dict): | |
opportunity_review_report += f"**Summary:** {structured_results['opportunity_evaluation'].get('summary', 'N/A')}\n\n" | |
opportunity_review_report += f"**Score:** {structured_results['opportunity_evaluation'].get('score', 'N/A')}\n\n" | |
opportunity_review_report += "**MEDDIC Evaluation:**\n\n" | |
for criterion, evaluation in structured_results['opportunity_evaluation'].get('meddic_evaluation', {}).items(): | |
opportunity_review_report += f"**{criterion}:** {evaluation}\n" | |
if isinstance(structured_results['next_action'], dict): | |
opportunity_review_report += "\n\n**Next Steps**\n\n" | |
opportunity_review_report += f"{structured_results['next_action'].get('next_action', 'N/A')}\n\n" | |
opportunity_review_report += "**Talking Points:**\n\n" | |
for i, point in enumerate(structured_results['next_action'].get('talking_points', []), 1): | |
opportunity_review_report += f" {i}. {point}\n" | |
file_path = "reports/HSBC Opportunity Review Report.md" | |
save_md_file(file_path, opportunity_review_report) | |
return opportunity_review_report | |
def save_md_file(file_path, file_content): | |
try: | |
if os.path.exists(file_path): | |
os.remove(file_path) | |
print(f"Existing file deleted: {file_path}") | |
with open(file_path, 'w', encoding='utf-8') as md_file: | |
md_file.write(file_content) | |
print(f"File saved successfully: {file_path}") | |
except PermissionError: | |
print(f"Permission denied when trying to delete or save file: {file_path}") | |
return None |