|
import os |
|
import json |
|
import time |
|
from speech_recognition import Recognizer, Microphone, AudioData, UnknownValueError, RequestError |
|
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer |
|
from huggingface_hub import login |
|
from product_recommender import ProductRecommender |
|
from objection_handler import load_objections, check_objections |
|
from objection_handler import ObjectionHandler |
|
from env_setup import config |
|
from sentence_transformers import SentenceTransformer |
|
from dotenv import load_dotenv |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
huggingface_api_key = config["huggingface_api_key"] |
|
login(token=huggingface_api_key) |
|
|
|
|
|
model_name = "tabularisai/multilingual-sentiment-analysis" |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
sentiment_analyzer = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) |
|
|
|
|
|
recognizer = Recognizer() |
|
|
|
|
|
def preprocess_text(text): |
|
"""Preprocess text for better sentiment analysis.""" |
|
return text.strip().lower() |
|
|
|
def analyze_sentiment(text): |
|
"""Analyze sentiment of the text using Hugging Face model.""" |
|
try: |
|
if not text.strip(): |
|
return "NEUTRAL", 0.0 |
|
|
|
processed_text = preprocess_text(text) |
|
result = sentiment_analyzer(processed_text)[0] |
|
|
|
print(f"Sentiment Analysis Result: {result}") |
|
|
|
|
|
sentiment_map = { |
|
'Very Negative': "NEGATIVE", |
|
'Negative': "NEGATIVE", |
|
'Neutral': "NEUTRAL", |
|
'Positive': "POSITIVE", |
|
'Very Positive': "POSITIVE" |
|
} |
|
|
|
sentiment = sentiment_map.get(result['label'], "NEUTRAL") |
|
return sentiment, result['score'] |
|
|
|
except Exception as e: |
|
print(f"Error in sentiment analysis: {e}") |
|
return "NEUTRAL", 0.5 |
|
|
|
def transcribe_with_chunks(objections_dict): |
|
print("Note: If microphone access fails, please use alternative input.") |
|
chunks = [] |
|
current_chunk = [] |
|
chunk_start_time = time.time() |
|
is_listening = False |
|
|
|
try: |
|
|
|
available_mics = Microphone.list_microphone_names() |
|
print(f"Available microphones: {available_mics}") |
|
except Exception as e: |
|
print(f"Could not detect microphones: {e}") |
|
|
|
|
|
objection_file_path = config.get("OBJECTION_DATA_PATH", "objections.csv") |
|
product_file_path = config.get("PRODUCT_DATA_PATH", "recommendations.csv") |
|
|
|
|
|
objection_handler = ObjectionHandler(objection_file_path) |
|
product_recommender = ProductRecommender(product_file_path) |
|
|
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
try: |
|
|
|
mic = None |
|
for device_index in range(10): |
|
try: |
|
mic = Microphone(device_index=device_index) |
|
print(f"Using microphone at device index {device_index}") |
|
break |
|
except Exception: |
|
continue |
|
|
|
if mic is None: |
|
print("No microphone available. Please provide text input.") |
|
return [] |
|
|
|
with mic as source: |
|
recognizer.adjust_for_ambient_noise(source) |
|
print("Microphone calibrated. Please speak.") |
|
|
|
while True: |
|
print("Listening for speech...") |
|
try: |
|
audio_data = recognizer.listen(source, timeout=5) |
|
text = recognizer.recognize_google(audio_data) |
|
|
|
if "start listening" in text.lower(): |
|
is_listening = True |
|
print("Listening started. Speak into the microphone.") |
|
continue |
|
elif "stop listening" in text.lower(): |
|
is_listening = False |
|
print("Listening stopped.") |
|
if current_chunk: |
|
chunk_text = " ".join(current_chunk) |
|
sentiment, score = analyze_sentiment(chunk_text) |
|
chunks.append((chunk_text, sentiment, score)) |
|
current_chunk = [] |
|
continue |
|
|
|
if is_listening and text.strip(): |
|
print(f"Transcription: {text}") |
|
current_chunk.append(text) |
|
|
|
if time.time() - chunk_start_time > 3: |
|
if current_chunk: |
|
chunk_text = " ".join(current_chunk) |
|
|
|
|
|
sentiment, score = analyze_sentiment(chunk_text) |
|
chunks.append((chunk_text, sentiment, score)) |
|
|
|
|
|
query_embedding = model.encode([chunk_text]) |
|
distances, indices = objection_handler.index.search(query_embedding, 1) |
|
|
|
|
|
if distances[0][0] < 1.5: |
|
responses = objection_handler.handle_objection(chunk_text) |
|
if responses: |
|
print("\nSuggested Response:") |
|
for response in responses: |
|
print(f"→ {response}") |
|
|
|
|
|
distances, indices = product_recommender.index.search(query_embedding, 1) |
|
|
|
|
|
if distances[0][0] < 1.5: |
|
recommendations = product_recommender.get_recommendations(chunk_text) |
|
if recommendations: |
|
print(f"\nRecommendations for this response:") |
|
for idx, rec in enumerate(recommendations, 1): |
|
print(f"{idx}. {rec}") |
|
|
|
print("\n") |
|
current_chunk = [] |
|
chunk_start_time = time.time() |
|
except UnknownValueError: |
|
print("Could not understand the audio.") |
|
except RequestError as e: |
|
print(f"Could not request results from Google Speech Recognition service; {e}") |
|
|
|
except KeyboardInterrupt: |
|
print("\nExiting...") |
|
return chunks |
|
|
|
if __name__ == "__main__": |
|
objections_file_path = config.get("OBJECTION_DATA_PATH", "objections.csv") |
|
objections_dict = load_objections(objections_file_path) |
|
transcribed_chunks = transcribe_with_chunks(objections_dict) |
|
print("Final transcriptions and sentiments:", transcribed_chunks) |