DereAbdulhameed commited on
Commit
1073b8a
·
verified ·
1 Parent(s): 0281777

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -19
app.py CHANGED
@@ -21,29 +21,52 @@ def load_models(model_size):
21
  model = whisper.load_model(model_size)
22
  return None, model
23
 
 
 
 
 
 
24
  def process_audio(audio_file, num_speakers, model_size):
25
  transcriber, whisper_model = load_models(model_size)
26
 
27
- with tempfile.NamedTemporaryFile(delete=True, suffix=".wav") as tmp:
 
 
 
 
28
  audio_file.seek(0) # Reset the file pointer
29
- tmp.write(audio_file.read())
30
- tmp.flush() # Ensure all data is written
31
- tmp_path = tmp.name
32
-
33
- try:
34
- if transcriber:
35
- result = transcriber(tmp_path)
36
- transcription_text = result['text']
37
- elif whisper_model:
38
- result = whisper_model.transcribe(tmp_path)
39
- transcription_text = result['text']
40
-
41
- diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=os.getenv('HF_TOKEN'))
42
- diarization = diarization_pipeline(tmp_path, min_speakers=num_speakers, max_speakers=5)
43
- return transcription_text, diarization.get_timeline().json()
44
- except Exception as e:
45
- print("Error processing audio file or diarization:", e)
46
- return None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
  def gradio_interface(audio_file, num_speakers, model_size):
49
  transcription, diarization = process_audio(audio_file, num_speakers, model_size)
 
21
  model = whisper.load_model(model_size)
22
  return None, model
23
 
24
+ from flask import jsonify
25
+ import tempfile
26
+ import os
27
+ import io
28
+
29
  def process_audio(audio_file, num_speakers, model_size):
30
  transcriber, whisper_model = load_models(model_size)
31
 
32
+ # Ensure audio file is provided
33
+ if audio_file is None:
34
+ return jsonify({"error": "Audio file is required"}), 400
35
+
36
+ try:
37
  audio_file.seek(0) # Reset the file pointer
38
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
39
+ tmp.write(audio_file.read())
40
+ tmp_path = tmp.name
41
+
42
+ # Initialize transcription_text
43
+ transcription_text = None
44
+
45
+ if transcriber:
46
+ result = transcriber(tmp_path)
47
+ transcription_text = result['text']
48
+ elif whisper_model:
49
+ result = whisper_model.transcribe(tmp_path)
50
+ transcription_text = result['text']
51
+
52
+ if transcription_text is None:
53
+ raise ValueError("No transcription results")
54
+
55
+ # Diarization process
56
+ diarization_pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1", use_auth_token=HF_TOKEN)
57
+ diarization = diarization_pipeline(tmp_path, min_speakers=num_speakers, max_speakers=5)
58
+
59
+ os.remove(tmp_path) # Cleanup the temporary file
60
+
61
+ return jsonify({
62
+ "transcription": transcription_text,
63
+ "diarization": diarization.get_timeline().json()
64
+ })
65
+
66
+ except Exception as e:
67
+ os.remove(tmp_path) # Ensure to cleanup on error
68
+ return jsonify({"error": f"Error processing audio file: {e}"}), 500
69
+
70
 
71
  def gradio_interface(audio_file, num_speakers, model_size):
72
  transcription, diarization = process_audio(audio_file, num_speakers, model_size)