import os import json from flask import Flask, jsonify, request, send_file, send_from_directory from langchain_core.messages import HumanMessage from langchain_google_genai import ChatGoogleGenerativeAI import assemblyai as aai from dotenv import load_dotenv # Load environment variables from the .env file load_dotenv() # Initialize the Flask app app = Flask(__name__) # Get API keys from environment variables ASSEMBLYAI_API_KEY = os.getenv("ASSEMBLYAI_API_KEY").strip() GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY").strip() # Set AssemblyAI API key aai.settings.api_key = ASSEMBLYAI_API_KEY # Set Google API key for Gemini model os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY # Define a directory to save uploaded audio files UPLOAD_FOLDER = 'uploads' os.makedirs(UPLOAD_FOLDER, exist_ok=True) app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER # Refined Instructions for Gemini GEMINI_INSTRUCTIONS = """ The purpose of this classification is to determine whether a caller has agreed to visit the dealership at a specific time or within a certain time range. ### Option 1: (Yes, at a specific time or range of time within 1 hour): A call falls into this category if the caller explicitly agrees to visit the dealership at a specific time or within a one-hour range. #### Criteria: - Caller explicitly mentions a specific time (e.g., "I'll be there at 4:00"). - Caller gives an estimated arrival time within a one-hour range (e.g., "I'll be there in 30 minutes"). - Caller provides a loose time range, but the agent confirms a firm time (e.g., Caller: "Between 6:00 and 8:00," Agent: "I'll put you down for 7:00"). - Caller is already on the dealership lot (walk-in). #### Examples (Accepted for this Option): - "I'm going to come in and take a look at 4:00." - "I'll stop by around 3:00 on Saturday." - "I'll try to go test drive it at noon." - "I'm on my way right now, and I'll be there in just a few minutes." - "I'll come down in 30 minutes if it's still on the lot." - "I'll be there in about 45-60 minutes." #### Not Considered Specific (Excluded from this Option): - "I'll come after 4:00" (since the arrival time is unknown, it could be anytime after 4 PM). - "I'll be at the dealership till 4 PM" (caller could arrive anytime before 4 PM). - "I'll be there between 7:00 and 9:00" (exceeds a one-hour range). --- ### Option 2: (Yes, at a loose time or range of time exceeding 1 hour): A call falls into this Option if the caller expresses an intention to visit but does not provide a specific or one-hour time range. #### Criteria: - Caller agrees to visit but gives a broad time frame (longer than one hour). - Caller uses vague terms such as "sometime today" or "this evening." - Caller mentions visiting based on a conditional factor (e.g., "If I get off work early, I'll stop in"). #### Examples (Accepted for this category): - "I'll stop by sometime today." - "My wife will go there Saturday." - "I might be there to test drive it this evening." - "I get off work at 5:00, and I'll come by after that." - "I'll be there tonight between 7:00 and 9:00." - "Maybe I'll come take a look at it later." - "If I get off work early, I'll stop in." --- ### Option 3: if call didnot fall in option 1 or option2 then select option 3 ### Classification Logic Summary: 1. If the caller specifies an exact time or an estimated arrival time within one hour → **optiony 1 (Specific time or within 1 hour).** 2. If the caller provides a time range longer than one hour or speaks vaguely about their visit → **Option 2 (Loose time or exceeding 1 hour).** Based on this classification, return the appropriate option from: - **Option 1**: Specific time or within 1 hour. - **Option 2**: Loose time or exceeding 1 hour. """ # Home route to serve the index.html file from the root directory @app.route('/') def home(): return send_file('index.html') # API route to handle file upload, transcription, and model interaction @app.route("/api/upload", methods=["POST"]) def generate_api(): if request.method == "POST": try: # Check if an audio file was uploaded if 'audio_file' not in request.files: return jsonify({"error": "No audio file provided"}), 400 audio_file = request.files['audio_file'] if audio_file.filename == '': return jsonify({"error": "No selected file"}), 400 # Save the uploaded file to the server file_path = os.path.join(app.config['UPLOAD_FOLDER'], audio_file.filename) audio_file.save(file_path) # Transcribe the audio using AssemblyAI transcriber = aai.Transcriber() transcript = transcriber.transcribe(file_path) # Send transcription and instructions to Gemini model model = ChatGoogleGenerativeAI(model="gemini-1.5-flash-002") message = HumanMessage(content=f"{GEMINI_INSTRUCTIONS}\n\nCall Transcription: {transcript.text}") response = model.stream([message]) # Interpret the model's response to select the correct option buffer = [] for chunk in response: buffer.append(chunk.content) result_text = ''.join(buffer).lower() options = {'option 1': 1,'option 2': 2,'option 3':3, 'option 4':4}; for option in options: if option in result_text: selected_option = options[option] break # Return the transcription and selected option return jsonify({ "transcription": transcript.text, "selected_option": selected_option }), 200 except Exception as e: return jsonify({"error": str(e)}) # Route to serve static files @app.route('/') def serve_static(path): return send_from_directory('.', path) # Run the Flask application if __name__ == '__main__': app.run(debug=True)