Spaces:
Running
Running
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import torch | |
import whisper | |
tokenizer = AutoTokenizer.from_pretrained("Bhuvana/t5-base-spellchecker") | |
model = AutoModelForSeq2SeqLM.from_pretrained("Bhuvana/t5-base-spellchecker") | |
def correct(inputs): | |
input_ids = tokenizer.encode(inputs,return_tensors='pt') | |
sample_output = model.generate( | |
input_ids, | |
do_sample=True, | |
max_length=50, | |
top_p=0.99, | |
num_return_sequences=1 | |
) | |
res = tokenizer.decode(sample_output[0], skip_special_tokens=True) | |
return res | |
whisper_model = whisper.load_model("base") | |
def transcribe(audio_file): | |
# Load audio and pad/trim it to fit 30 seconds | |
audio = whisper.load_audio(audio_file) | |
audio = whisper.pad_or_trim(audio) | |
# Convert audio data to PyTorch tensor and float data type | |
mel = torch.from_numpy(audio).float() | |
# Make log-Mel spectrogram and move to the same device as the model | |
mel = whisper.log_mel_spectrogram(mel).to(model.device) | |
# Detect the spoken language | |
_, probs = whisper_model.detect_language(mel) | |
# Decode the audio | |
options = whisper.DecodingOptions(fp16=False) | |
result = whisper.decode(whisper_model, mel, options) | |
result_text = result.text | |
print('result_text:'+result_text) | |
return correct(result_text) | |