SalesAI / sentiment_analysis.py
Zasha1's picture
Update sentiment_analysis.py
b4e4003 verified
import os
import json
import time
from speech_recognition import Recognizer, Microphone, AudioData, UnknownValueError, RequestError
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from huggingface_hub import login
from product_recommender import ProductRecommender
from objection_handler import load_objections, check_objections
from objection_handler import ObjectionHandler
from env_setup import config
from sentence_transformers import SentenceTransformer
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Hugging Face API setup
huggingface_api_key = config["huggingface_api_key"]
login(token=huggingface_api_key)
# Sentiment Analysis Model
model_name = "tabularisai/multilingual-sentiment-analysis"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
# Speech Recognition Setup
recognizer = Recognizer()
# Function to analyze sentiment
def preprocess_text(text):
"""Preprocess text for better sentiment analysis."""
return text.strip().lower()
def analyze_sentiment(text):
"""Analyze sentiment of the text using Hugging Face model."""
try:
if not text.strip():
return "NEUTRAL", 0.0
processed_text = preprocess_text(text)
result = sentiment_analyzer(processed_text)[0]
print(f"Sentiment Analysis Result: {result}")
# Map raw labels to sentiments
sentiment_map = {
'Very Negative': "NEGATIVE",
'Negative': "NEGATIVE",
'Neutral': "NEUTRAL",
'Positive': "POSITIVE",
'Very Positive': "POSITIVE"
}
sentiment = sentiment_map.get(result['label'], "NEUTRAL")
return sentiment, result['score']
except Exception as e:
print(f"Error in sentiment analysis: {e}")
return "NEUTRAL", 0.5
def transcribe_with_chunks(objections_dict):
print("Note: If microphone access fails, please use alternative input.")
chunks = []
current_chunk = []
chunk_start_time = time.time()
is_listening = False
try:
# Try to list available microphones
available_mics = Microphone.list_microphone_names()
print(f"Available microphones: {available_mics}")
except Exception as e:
print(f"Could not detect microphones: {e}")
# Replace hardcoded path with environment variable or relative path
objection_file_path = config.get("OBJECTION_DATA_PATH", "objections.csv")
product_file_path = config.get("PRODUCT_DATA_PATH", "recommendations.csv")
# Initialize handlers with semantic search capabilities
objection_handler = ObjectionHandler(objection_file_path)
product_recommender = ProductRecommender(product_file_path)
# Load the embeddings model once
model = SentenceTransformer('all-MiniLM-L6-v2')
try:
# Try multiple device indices
mic = None
for device_index in range(10): # Try first 10 device indices
try:
mic = Microphone(device_index=device_index)
print(f"Using microphone at device index {device_index}")
break
except Exception:
continue
if mic is None:
print("No microphone available. Please provide text input.")
return []
with mic as source:
recognizer.adjust_for_ambient_noise(source)
print("Microphone calibrated. Please speak.")
while True:
print("Listening for speech...")
try:
audio_data = recognizer.listen(source, timeout=5)
text = recognizer.recognize_google(audio_data)
if "start listening" in text.lower():
is_listening = True
print("Listening started. Speak into the microphone.")
continue
elif "stop listening" in text.lower():
is_listening = False
print("Listening stopped.")
if current_chunk:
chunk_text = " ".join(current_chunk)
sentiment, score = analyze_sentiment(chunk_text)
chunks.append((chunk_text, sentiment, score))
current_chunk = []
continue
if is_listening and text.strip():
print(f"Transcription: {text}")
current_chunk.append(text)
if time.time() - chunk_start_time > 3:
if current_chunk:
chunk_text = " ".join(current_chunk)
# Always process sentiment
sentiment, score = analyze_sentiment(chunk_text)
chunks.append((chunk_text, sentiment, score))
# Get objection responses and check similarity score
query_embedding = model.encode([chunk_text])
distances, indices = objection_handler.index.search(query_embedding, 1)
# If similarity is high enough, show objection response
if distances[0][0] < 1.5: # Threshold for similarity
responses = objection_handler.handle_objection(chunk_text)
if responses:
print("\nSuggested Response:")
for response in responses:
print(f"→ {response}")
# Get product recommendations and check similarity score
distances, indices = product_recommender.index.search(query_embedding, 1)
# If similarity is high enough, show recommendations
if distances[0][0] < 1.5: # Threshold for similarity
recommendations = product_recommender.get_recommendations(chunk_text)
if recommendations:
print(f"\nRecommendations for this response:")
for idx, rec in enumerate(recommendations, 1):
print(f"{idx}. {rec}")
print("\n")
current_chunk = []
chunk_start_time = time.time()
except UnknownValueError:
print("Could not understand the audio.")
except RequestError as e:
print(f"Could not request results from Google Speech Recognition service; {e}")
except KeyboardInterrupt:
print("\nExiting...")
return chunks
if __name__ == "__main__":
objections_file_path = config.get("OBJECTION_DATA_PATH", "objections.csv")
objections_dict = load_objections(objections_file_path)
transcribed_chunks = transcribe_with_chunks(objections_dict)
print("Final transcriptions and sentiments:", transcribed_chunks)