Zulelee commited on
Commit
1851f66
·
verified ·
1 Parent(s): fb05c05

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException
2
+ import torch
3
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
4
+ import requests
5
+ import json
6
+ import tempfile
7
+ import os
8
+
9
+ app = FastAPI()
10
+
11
+ # Set up Whisper model
12
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
13
+ torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
14
+
15
+ model_id = "openai/whisper-large-v3-turbo"
16
+
17
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(
18
+ model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
19
+ )
20
+ model.to(device)
21
+
22
+ processor = AutoProcessor.from_pretrained(model_id)
23
+
24
+ pipe = pipeline(
25
+ "automatic-speech-recognition",
26
+ model=model,
27
+ tokenizer=processor.tokenizer,
28
+ feature_extractor=processor.feature_extractor,
29
+ torch_dtype=torch_dtype,
30
+ device=device,
31
+ )
32
+
33
+ OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
34
+ OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
35
+
36
+ @app.post("/transcribe-analyze/")
37
+ async def transcribe_analyze(file: UploadFile = File(...)):
38
+ try:
39
+ # Save the uploaded file temporarily
40
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
41
+ temp_audio.write(await file.read())
42
+ temp_audio_path = temp_audio.name
43
+
44
+ # Transcribe audio
45
+ transcription_result = pipe(temp_audio_path, return_timestamps=True)
46
+ transcription = transcription_result["text"]
47
+
48
+ # Send transcription to AI for classification
49
+ response = requests.post(
50
+ url=OPENROUTER_URL,
51
+ headers={
52
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
53
+ "Content-Type": "application/json"
54
+ },
55
+ data=json.dumps({
56
+ "model": "meta-llama/llama-3.1-70b-instruct:free",
57
+ "messages": [
58
+ {
59
+ "role": "user",
60
+ "content": f"You are an AI Assistant that is given the transcript between a call agent and a lead, and you must classify if the lead happily agreed to the booking. The response should have 4 parts: 1. Appointment Booked: Yes/No, 2. Short reason for your answer, 3. Short summary of the call, 4. Lead's overall emotion. \n Here is the transcription: {transcription}",
61
+ }
62
+ ]
63
+ })
64
+ )
65
+
66
+ ai_response = response.json().get("choices", [{}])[0].get("message", {}).get("content", "No response from AI.")
67
+
68
+ # Remove temporary file
69
+ os.remove(temp_audio_path)
70
+
71
+ return {"transcription": transcription, "ai_response": ai_response}
72
+
73
+ except Exception as e:
74
+ return HTTPException(status_code=500, detail=str(e))