geethareddy commited on
Commit
56becc0
·
verified ·
1 Parent(s): 679d24a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -3,18 +3,17 @@ from flask import Flask, render_template, request, jsonify
3
  import os
4
  import re
5
  import ffmpeg
6
- from transformers import pipeline
7
  from gtts import gTTS
8
  from pydub import AudioSegment
9
  from pydub.silence import detect_nonsilent
10
  from waitress import serve
11
- import whisper # Corrected whisper import
12
 
13
  app = Flask(__name__)
14
 
15
- # Load Whisper Model for Highly Accurate Speech-to-Text
16
  device = "cuda" if torch.cuda.is_available() else "cpu"
17
- asr_model = whisper.load_model("large-v3")
18
 
19
  # Function to generate audio prompts
20
  def generate_audio_prompt(text, filename):
@@ -32,7 +31,7 @@ prompts = {
32
  for key, text in prompts.items():
33
  generate_audio_prompt(text, f"{key}.mp3")
34
 
35
- # Symbol mapping for better recognition
36
  SYMBOL_MAPPING = {
37
  "at the rate": "@",
38
  "at": "@",
@@ -69,7 +68,7 @@ def clean_transcription(text):
69
  def is_silent_audio(audio_path):
70
  audio = AudioSegment.from_wav(audio_path)
71
  nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16)
72
- return len(nonsilent_parts) == 0
73
 
74
  @app.route("/")
75
  def index():
@@ -93,8 +92,8 @@ def transcribe():
93
  if is_silent_audio(output_audio_path):
94
  return jsonify({"error": "No speech detected. Please try again."}), 400
95
 
96
- # Transcribe using Whisper
97
- result = asr_model.transcribe(output_audio_path)
98
  transcribed_text = clean_transcription(result["text"])
99
 
100
  return jsonify({"text": transcribed_text})
 
3
  import os
4
  import re
5
  import ffmpeg
6
+ from transformers import pipeline # ✅ Using correct Whisper ASR pipeline
7
  from gtts import gTTS
8
  from pydub import AudioSegment
9
  from pydub.silence import detect_nonsilent
10
  from waitress import serve
 
11
 
12
  app = Flask(__name__)
13
 
14
+ # Load Whisper ASR Model correctly
15
  device = "cuda" if torch.cuda.is_available() else "cpu"
16
+ asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-large-v3", device=0 if device == "cuda" else -1)
17
 
18
  # Function to generate audio prompts
19
  def generate_audio_prompt(text, filename):
 
31
  for key, text in prompts.items():
32
  generate_audio_prompt(text, f"{key}.mp3")
33
 
34
+ # Symbol mapping for proper recognition
35
  SYMBOL_MAPPING = {
36
  "at the rate": "@",
37
  "at": "@",
 
68
  def is_silent_audio(audio_path):
69
  audio = AudioSegment.from_wav(audio_path)
70
  nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16)
71
+ return len(nonsilent_parts) == 0 # Returns True if silence detected
72
 
73
  @app.route("/")
74
  def index():
 
92
  if is_silent_audio(output_audio_path):
93
  return jsonify({"error": "No speech detected. Please try again."}), 400
94
 
95
+ # Use Whisper ASR model for transcription
96
+ result = asr_model(output_audio_path, generate_kwargs={"language": "en"})
97
  transcribed_text = clean_transcription(result["text"])
98
 
99
  return jsonify({"text": transcribed_text})