Spaces:

minjibi
/

qa-Gradio

Sleeping

App Files Files Community

minjibi commited on Apr 21, 2023

Commit

0945e74

1 Parent(s): 3172a4f

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -51

app.py CHANGED Viewed

@@ -1,58 +1,51 @@
-#Importing all the necessary packages
-import nltk
-import librosa
-import torch
 import gradio as gr
-from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC
-nltk.download("punkt")
-#Loading the pre-trained model and the tokenizer
-model_name = "shizukanabasho/north2"
-tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
-model = Wav2Vec2ForCTC.from_pretrained(model_name)
-def load_data(input_file):
-  #reading the file
-  speech, sample_rate = librosa.load(input_file)
-  #make it 1-D
-  if len(speech.shape) > 1:
-      speech = speech[:,0] + speech[:,1]
-  #Resampling the audio at 16KHz
-  if sample_rate !=16000:
-    speech = librosa.resample(speech, sample_rate,16000)
-  return speech
-def correct_casing(input_sentence):
-  sentences = nltk.sent_tokenize(input_sentence)
-  return (''.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
-def asr_transcript(input_file):
-  speech = load_data(input_file)
-  #Tokenize
-  input_values = tokenizer(speech, return_tensors="pt").input_values
-  #Take logits
-  logits = model(input_values).logits
-  #Take argmax
-  predicted_ids = torch.argmax(logits, dim=-1)
-  #Get the words from predicted word ids
-  transcription = tokenizer.decode(predicted_ids[0])
-  #Correcting the letter casing
-  # transcription = correct_casing(transcription.lower())
-  return transcription
-gr.Interface(asr_transcript,
-             inputs = gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Upload"),
-             outputs = gr.outputs.Textbox(label="Output Text"),
-             title="ASR using Wav2Vec2.0",
-             description = "This application displays transcribed text for given audio input",
-             theme="grass").launch()
-# gr.Interface(asr_transcript,
-#              inputs = [gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker"),
-#                        gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Speaker")],
-#              outputs = gr.outputs.Textbox(label="Output Text"),
-#              title="ASR using Wav2Vec2.0",
-#              description = "This application displays transcribed text for given audio input",
-#              theme="grass").launch()

 import gradio as gr
+import pytorch_lightning as pl
+from pytorch_lightning.callbacks.early_stopping import EarlyStopping
+from transformers import (
+    MT5ForConditionalGeneration,
+    MT5TokenizerFast,
+)
+model = MT5ForConditionalGeneration.from_pretrained(
+    "minjibi/qa",
+    return_dict=True,
+)
+tokenizer = MT5TokenizerFast.from_pretrained(
+    "minjibi/qa"
+)
+model.cuda()
+def predict(text):
+    with torch.no_grad():
+        input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
+        input_ids = input_ids.cuda()
+        generated_ids = model.generate(
+            input_ids=input_ids,
+            num_beams=5,
+            max_length=1000,
+            repetition_penalty=3.0, #default = 2.5
+            length_penalty=1.0,
+            early_stopping=True,
+            top_p=50, #default 50
+            top_k=20, #default 20
+            num_return_sequences=3,
+        )
+        preds = [
+            tokenizer.decode(
+                g,
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=True,
+            )
+            for g in generated_ids
+        ]
+    return ['Q: ' + text for text in preds]
+# text_to_predict = predict(text)
+# predicted = ['Q: ' + text for text in predict(text_to_predict)]
+# predicted
+iface = gr.Interface(fn=predict, inputs="text", outputs="text")
+iface.launch()