nata0801 commited on
Commit
9726882
1 Parent(s): a299284

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -9
app.py CHANGED
@@ -21,18 +21,20 @@ def asr_transcript(audio_file, language):
21
  model = Wav2Vec2ForCTC.from_pretrained(model)
22
 
23
  #read the file and resample to 16KHz
24
- stream = librosa.stream(audio_file.name, block_length=20, frame_length=16000, hop_length=16000)
 
 
 
25
 
26
- for speech in stream:
27
- if len(speech.shape) > 1:
28
- speech = speech[:, 0] + speech[:, 1]
29
 
30
- input_values = tokenizer(speech, return_tensors="pt").input_values
31
- logits = model(input_values).logits
32
 
33
- predicted_ids = torch.argmax(logits, dim=-1)
34
- transcription = tokenizer.batch_decode(predicted_ids)[0]
35
- transcript += transcription.lower() + " "
36
 
37
  return transcript
38
 
 
21
  model = Wav2Vec2ForCTC.from_pretrained(model)
22
 
23
  #read the file and resample to 16KHz
24
+ #stream = librosa.stream(audio_file.name, block_length=20, frame_length=16000, hop_length=16000)
25
+
26
+ #read the file
27
+ speech, sample_rate = librosa.load(input_file, 16000)
28
 
29
+ if len(speech.shape) > 1:
30
+ speech = speech[:, 0] + speech[:, 1]
 
31
 
32
+ input_values = tokenizer(speech, return_tensors="pt").input_values
33
+ logits = model(input_values).logits
34
 
35
+ predicted_ids = torch.argmax(logits, dim=-1)
36
+ transcription = tokenizer.batch_decode(predicted_ids)[0]
37
+ transcript = transcription.lower()
38
 
39
  return transcript
40