siddh4rth commited on
Commit
bb32e5f
·
1 Parent(s): 17ad52d

finetuned model

Browse files
Files changed (1) hide show
  1. app.py +30 -5
app.py CHANGED
@@ -1,13 +1,38 @@
 
1
  import gradio as gr
2
  import whisper
 
 
 
3
 
4
- def audio_to_text(audio):
5
- model = whisper.load_model("base")
6
 
7
- audio = whisper.load_audio(audio)
8
- result = model.transcribe(audio)
9
 
10
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  demo = gr.Interface(
13
  fn=audio_to_text,
 
1
+ import os
2
  import gradio as gr
3
  import whisper
4
+ import librosa
5
+ import torch
6
+ from transformers import Wav2Vec2Processor, Wav2Vec2ForCTCTokenizer
7
 
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
9
 
10
+ def audio_to_text(logits):
11
+ # model = whisper.load_model("base")
12
 
13
+ # audio = whisper.load_audio(audio)
14
+ # result = model.transcribe(audio)
15
+
16
+ # return result["text"]
17
+ tokenizer = Wav2Vec2ForCTCTokenizer("model_save/wav2vec2_osr_version_1_vocab/vocab.json", unk_token="<unk>", pad_token="<pad>", word_delimiter_token="|")
18
+
19
+ predicted_ids = torch.argmax(logits, dim=-1)
20
+ transcriptions = tokenizer.decode(predicted_ids[0])
21
+ return transcriptions
22
+
23
+ def preprocess(audio):
24
+ model_save_path = "model_save"
25
+ model_name = "wav2vec2_osr_version_1"
26
+ speech, rate = librosa.load(audio, sr=16000)
27
+ model_path = os.path.join(model_save_path, model_name+".pt")
28
+ pipeline_path = os.path.join(model_save_path, model_name+"_vocab")
29
+
30
+ processor = Wav2Vec2Processor.from_pretrained(pipeline_path)
31
+ model = torch.load(model_path)
32
+ model.eval()
33
+ input_values = processor(speech, sampling_rate=rate, return_tensors="pt").input_values.to(device)
34
+ logits = model(input_values).logits
35
+ return logits
36
 
37
  demo = gr.Interface(
38
  fn=audio_to_text,