SeyedAli commited on
Commit
2c5f2c1
1 Parent(s): 277ff1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -42
app.py CHANGED
@@ -13,43 +13,6 @@ text_output = gr.TextArea(label="متن فارسی", type="text")
13
 
14
  processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
15
  model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
16
-
17
- chars_to_ignore = [
18
- ",", "?", ".", "!", "-", ";", ":", '""', "%", "'", '"', "�",
19
- "#", "!", "?", "«", "»", "(", ")", "؛", ",", "?", ".", "!", "-", ";", ":", '"',
20
- "“", "%", "‘", "�", "–", "…", "_", "”", '“', '„'
21
- ]
22
- chars_to_mapping = {
23
- "\u200c": " ", "\u200d": " ", "\u200e": " ", "\u200f": " ", "\ufeff": " ",
24
- }
25
-
26
- def multiple_replace(text, chars_to_mapping):
27
- pattern = "|".join(map(re.escape, chars_to_mapping.keys()))
28
- return re.sub(pattern, lambda m: chars_to_mapping[m.group()], str(text))
29
-
30
- def remove_special_characters(text, chars_to_ignore_regex):
31
- text = re.sub(chars_to_ignore_regex, '', text).lower() + " "
32
- return text
33
-
34
- def normalizer(batch, chars_to_ignore, chars_to_mapping):
35
- chars_to_ignore_regex = f"""[{"".join(chars_to_ignore)}]"""
36
- text = batch[0].lower().strip()
37
-
38
- text = text.replace("\u0307", " ").strip()
39
- text = multiple_replace(text, chars_to_mapping)
40
- text = remove_special_characters(text, chars_to_ignore_regex)
41
-
42
- batch = text
43
- return batch
44
-
45
-
46
- def speech_file_to_array_fn(batch):
47
- speech_array, sampling_rate = torchaudio.load(batch["path"])
48
- speech_array = speech_array.squeeze().numpy()
49
- speech_array = librosa.resample(np.asarray(speech_array), sampling_rate, 16_000)
50
-
51
- batch["speech"] = speech_array
52
- return batch
53
 
54
  def ASR(audio):
55
  pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
@@ -70,12 +33,7 @@ def ASR(audio):
70
  with torch.no_grad():
71
  logits = model(input_values,attention_mask).logits
72
  # Decode the transcription
73
- #result = normalizer(processor.batch_decode(torch.argmax(logits[0], dim=-1)),chars_to_ignore,chars_to_mapping)
74
  result = processor.decode(torch.argmax(logits[0], dim=-1))
75
- # max_items = np.random.randint(0, len(result), 10).tolist()
76
- # for i in max_items:
77
- # transcription=result[i]
78
- # return transcription
79
  return result
80
  iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=text_output)
81
  iface.launch(share=False)
 
13
 
14
  processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
15
  model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  def ASR(audio):
18
  pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
 
33
  with torch.no_grad():
34
  logits = model(input_values,attention_mask).logits
35
  # Decode the transcription
 
36
  result = processor.decode(torch.argmax(logits[0], dim=-1))
 
 
 
 
37
  return result
38
  iface = gr.Interface(fn=ASR, inputs=audio_input, outputs=text_output)
39
  iface.launch(share=False)