Spaces:
Running
Running
import torch | |
import torchaudio | |
import torch | |
from transformers import pipeline | |
import soundfile as sf | |
import torch | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
import argparse | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import librosa | |
from huggingface_hub.hf_api import HfFolder | |
from transformers import MarianMTModel, MarianTokenizer | |
from elevenlabs import set_api_key | |
from elevenlabs import clone, generate, play | |
HfFolder.save_token('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') | |
access_token = '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' | |
tokenizer_en_hn = AutoTokenizer.from_pretrained("vasudevgupta/mbart-bhasha-hin-eng") | |
model_translate_en_hm = AutoModelForSeq2SeqLM.from_pretrained("vasudevgupta/mbart-bhasha-hin-eng") | |
processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200") | |
model_speech = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200") | |
def parse_transcription(wav_file): | |
# load audio | |
audio_input, sample_rate = librosa.load(wav_file, sr=16000) | |
# pad input values and return pt tensor | |
input_values = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_values | |
# INFERENCE | |
# retrieve logits & take argmax | |
logits = model_speech(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
# transcribe | |
transcription = processor.decode(predicted_ids[0], skip_special_tokens=True) | |
return transcription | |
def hindi_to_english(text): | |
inputs = tokenizer_en_hn.encode( | |
text, return_tensors="pt",padding=True,max_length=512,truncation=True) | |
outputs = model_translate_en_hm.generate( | |
inputs, max_length=128, num_beams=4, early_stopping=True | |
) | |
translated = tokenizer_en_hn.decode(outputs[0]).replace('<pad>',"").replace('<s>', "").strip().lower() | |
return translated | |
def translate_english_to_hindi(input_text): | |
# Load the pre-trained English to Hindi translation model and tokenizer | |
model_name = "Helsinki-NLP/opus-mt-en-hi" | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
# Tokenize the input text and generate translation | |
inputs = tokenizer(input_text, return_tensors="pt", padding=True) | |
translated_ids = model.generate(inputs.input_ids) | |
# Decode the translated output | |
translated_text = tokenizer.decode(translated_ids[0], skip_special_tokens=True) | |
return translated_text | |
def hindi_tts(text): | |
audio = generate( | |
text=text, | |
voice="Customer Service", | |
model="eleven_monolingual_v1" | |
) | |
return audio | |