nata0801 commited on
Commit
a299284
1 Parent(s): 1842c19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -11
app.py CHANGED
@@ -5,15 +5,6 @@ import torch
5
  import gradio as gr
6
  from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC
7
  nltk.download("punkt")
8
-
9
-
10
-
11
- def correct_casing(input_sentence):
12
- """ This function is for correcting the casing of the generated transcribed text
13
- """
14
- sentences = nltk.sent_tokenize(input_sentence)
15
- return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
16
-
17
 
18
 
19
  def asr_transcript(audio_file, language):
@@ -22,9 +13,9 @@ def asr_transcript(audio_file, language):
22
 
23
  #Selecting the language and loading the model and the tokenizer
24
  if language == "English":
25
- model_name = "facebook/wav2vec2-large-960h-lv60-self"
26
  elif language == "Russian":
27
- model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-russian"
28
 
29
  tokenizer = Wav2Vec2Tokenizer.from_pretrained(model)
30
  model = Wav2Vec2ForCTC.from_pretrained(model)
 
5
  import gradio as gr
6
  from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC
7
  nltk.download("punkt")
 
 
 
 
 
 
 
 
 
8
 
9
 
10
  def asr_transcript(audio_file, language):
 
13
 
14
  #Selecting the language and loading the model and the tokenizer
15
  if language == "English":
16
+ model = "facebook/wav2vec2-large-960h-lv60-self"
17
  elif language == "Russian":
18
+ model = "jonatasgrosman/wav2vec2-large-xlsr-53-russian"
19
 
20
  tokenizer = Wav2Vec2Tokenizer.from_pretrained(model)
21
  model = Wav2Vec2ForCTC.from_pretrained(model)