Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,10 +4,14 @@ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
|
|
4 |
import gradio as gr
|
5 |
import sox
|
6 |
import subprocess
|
7 |
-
from google_spell_checker import GoogleSpellChecker
|
|
|
8 |
|
|
|
|
|
9 |
|
10 |
-
|
|
|
11 |
|
12 |
def read_file_and_process(wav_file):
|
13 |
filename = wav_file.split('.')[0]
|
@@ -34,22 +38,33 @@ def parse_transcription(logits):
|
|
34 |
return transcription
|
35 |
|
36 |
|
37 |
-
def corrector(sentence):
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
def parse(wav_file):
|
46 |
input_values = read_file_and_process(wav_file)
|
47 |
with torch.no_grad():
|
48 |
logits = model(**input_values).logits
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
|
54 |
# def parse(wav_file):
|
55 |
# check_spell = ''
|
@@ -84,7 +99,7 @@ txtbox = gr.Textbox(
|
|
84 |
|
85 |
title = "Speech-to-Text (persian)"
|
86 |
description = "، توجه داشته باشید که هرچه گفتار شما شمرده تر باشد خروجی با کیفیت تری دارید.روی دکمه ضبط صدا کلیک کنید و سپس دسترسی مرورگر خود را به میکروفون دستگاه بدهید، سپس شروع به صحبت کنید و برای اتمام ضبط دوباره روی دکمه کلیک کنید"
|
87 |
-
article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
|
88 |
|
89 |
demo = gr.Interface(fn=parse, inputs = input_, outputs=txtbox, title=title, description=description, article = article,
|
90 |
streaming=True, interactive=True,
|
|
|
4 |
import gradio as gr
|
5 |
import sox
|
6 |
import subprocess
|
7 |
+
# from google_spell_checker import GoogleSpellChecker
|
8 |
+
import openai
|
9 |
|
10 |
+
# Set your OpenAI API key
|
11 |
+
api_key = "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"
|
12 |
|
13 |
+
|
14 |
+
# spell_checker = GoogleSpellChecker(lang="fa")
|
15 |
|
16 |
def read_file_and_process(wav_file):
|
17 |
filename = wav_file.split('.')[0]
|
|
|
38 |
return transcription
|
39 |
|
40 |
|
41 |
+
# def corrector(sentence):
|
42 |
+
# check_spell = spell_checker.check(sentence)
|
43 |
+
# if check_spell[1] is None:
|
44 |
+
# return sentence
|
45 |
+
# else:
|
46 |
+
# return check_spell[1]
|
47 |
+
def correct_text_with_gpt(text):
|
48 |
+
openai.api_key = api_key
|
49 |
+
response = openai.Completion.create(
|
50 |
+
engine="text-davinci-003",
|
51 |
+
prompt=f"Please correct the following text: '{text}'\n\nCorrected text:",
|
52 |
+
max_tokens=1000,
|
53 |
+
temperature=0.5, # Temperature controls the randomness of the model's output. A higher value like 1.0 makes the output more random, while a lower value like 0.2 makes it more deterministic and focused.
|
54 |
+
top_p=1.0, # This parameter controls the diversity of the output. It sets a threshold for the cumulative probability of words to keep. Smaller values like 0.2 will result in more focused responses, while larger values like 0.8 will allow for more diversity.
|
55 |
+
frequency_penalty=0.2, # encourages the use of less common words
|
56 |
+
presence_penalty=0.5, # discourages the use of common words.
|
57 |
+
)
|
58 |
+
return response.choices[0].text.strip()
|
59 |
+
|
60 |
+
|
61 |
def parse(wav_file):
|
62 |
input_values = read_file_and_process(wav_file)
|
63 |
with torch.no_grad():
|
64 |
logits = model(**input_values).logits
|
65 |
+
return correct_text_with_gpt(parse_transcription(logits))
|
66 |
+
|
67 |
+
|
|
|
68 |
|
69 |
# def parse(wav_file):
|
70 |
# check_spell = ''
|
|
|
99 |
|
100 |
title = "Speech-to-Text (persian)"
|
101 |
description = "، توجه داشته باشید که هرچه گفتار شما شمرده تر باشد خروجی با کیفیت تری دارید.روی دکمه ضبط صدا کلیک کنید و سپس دسترسی مرورگر خود را به میکروفون دستگاه بدهید، سپس شروع به صحبت کنید و برای اتمام ضبط دوباره روی دکمه کلیک کنید"
|
102 |
+
# article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"
|
103 |
|
104 |
demo = gr.Interface(fn=parse, inputs = input_, outputs=txtbox, title=title, description=description, article = article,
|
105 |
streaming=True, interactive=True,
|