sotirios-slv commited on
Commit
3ae01f4
1 Parent(s): 7e182ab

Updated with text processing for better comparison

Browse files
Files changed (1) hide show
  1. app.py +13 -1
app.py CHANGED
@@ -2,6 +2,8 @@ from difflib import Differ
2
 
3
  import gradio as gr
4
 
 
 
5
  import torch
6
  from transformers import (
7
  AutoModelForSpeechSeq2Seq,
@@ -55,6 +57,13 @@ pipe = pipeline(
55
  )
56
 
57
 
 
 
 
 
 
 
 
58
  def diff_texts(diction_text: str, audio_input: str):
59
  d = Differ()
60
 
@@ -76,7 +85,10 @@ def calc_score(diff_texts: list) -> float:
76
 
77
  def transcribe_audio(diction_text, audio):
78
  result = pipe(audio)
79
- diff_text = diff_texts(diction_text, result["text"])
 
 
 
80
 
81
  score = calc_score(diff_text)
82
 
 
2
 
3
  import gradio as gr
4
 
5
+ import string
6
+
7
  import torch
8
  from transformers import (
9
  AutoModelForSpeechSeq2Seq,
 
57
  )
58
 
59
 
60
+ def prepare_text_for_comparison(text_to_clean: str):
61
+
62
+ text_to_clean = text_to_clean.translate(str.maketrans("", "", string.punctuation))
63
+
64
+ return text_to_clean.casefold()
65
+
66
+
67
  def diff_texts(diction_text: str, audio_input: str):
68
  d = Differ()
69
 
 
85
 
86
  def transcribe_audio(diction_text, audio):
87
  result = pipe(audio)
88
+
89
+ cleaned_result = prepare_text_for_comparison(result["text"])
90
+ cleaned_diction_text = prepare_text_for_comparison(diction_text)
91
+ diff_text = diff_texts(cleaned_diction_text, cleaned_result)
92
 
93
  score = calc_score(diff_text)
94