#!/usr/bin/env python # coding: utf-8 # In[1]: #Install All the Required Dependencies #!pip3 install torch torchvision torchaudio #!pip install transformers ipywidgets gradio --upgrade #!pip install --upgrade transformers accelerate #!pip install --upgrade gradio #!pip install nltk #!pip install jiwer #!pip install sentencepiece #!pip install sacremoses #!pip install soundfile #!pip install librosa numpy jiwer nltk #!pip install --upgrade pip #!pip install huggingface_hub # In[2]: #Import Required Libraries from transformers import pipeline from jiwer import wer from transformers import VitsModel, AutoTokenizer, set_seed import torch import soundfile as sf import librosa from scipy.spatial.distance import euclidean import numpy as np import string import os from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction from nltk.translate.meteor_score import meteor_score import string import numpy as np import librosa from scipy.spatial.distance import euclidean import string # In[3]: import nltk nltk.download('wordnet') nltk.download('omw-1.4') # Optional if using WordNet's multilingual features import nltk print(nltk.data.path) import nltk nltk.data.path.append('./nltk_data') # In[4]: #Define all Utility Functions # Function to compute BLEU score def compute_bleu(reference_text, predicted_text): """ Computes the BLEU score for a single translation. :param reference_text: The ground truth text (in Yoruba). :param predicted_text: The machine-generated translation text (in Yoruba). :return: BLEU score (float). """ print("The Reference Text = ", reference_text) print("The Predicted Text = ",predicted_text) # Tokenize the reference and predicted texts reference_tokens = [reference_text.split()] # Reference should be wrapped in a list predicted_tokens = predicted_text.split() # Add smoothing to handle cases with few n-gram matches smoothing_function = SmoothingFunction().method1 # Compute BLEU score bleu_score = sentence_bleu(reference_tokens, predicted_tokens, smoothing_function=smoothing_function) #print("The Computed bleu_score in the Compute_Blue Fn = ",bleu_score) return round(bleu_score,2) # Function to compute Word Error Rate (WER) def compute_wer(reference_text, predicted_text): """ Computes the Word Error Rate (WER) for a single translation. :param reference_text: The ground truth text (in Yoruba). :param predicted_text: The machine-generated translation text (in Yoruba). :return: WER score (float). """ # Normalize text: lowercase and remove punctuation reference_text = reference_text.lower().translate(str.maketrans('', '', string.punctuation)) predicted_text = predicted_text.lower().translate(str.maketrans('', '', string.punctuation)) # Compute WER wer_score = wer(reference_text, predicted_text) return round(wer_score,2) # Function to compute METEOR score def compute_meteor(reference_text, predicted_text): """ Computes the METEOR score for a single translation. :param reference_text: The ground truth text (in Yoruba). :param predicted_text: The machine-generated translation text (in Yoruba). :return: METEOR score (float). """ # Normalize text: lowercase and remove punctuation reference_text = reference_text.lower().translate(str.maketrans('', '', string.punctuation)) predicted_text = predicted_text.lower().translate(str.maketrans('', '', string.punctuation)) # Tokenize text into lists of words reference_tokens = reference_text.split() predicted_tokens = predicted_text.split() # Compute METEOR score meteor = meteor_score([reference_tokens], predicted_tokens) return round(meteor,2) # Function to compute Mel Cepstral Distance (MCD) def compute_mcd(ground_truth_audio_path, predicted_audio_path): """ Computes the Mel Cepstral Distance (MCD) between two audio files. :param ground_truth_audio_path: Path to the ground truth audio file. :param predicted_audio_path: Path to the predicted audio file. :return: MCD score (float). """ # Load audio files y_true, sr_true = librosa.load(ground_truth_audio_path, sr=16000) y_pred, sr_pred = librosa.load(predicted_audio_path, sr=16000) # Ensure the sampling rates match assert sr_true == sr_pred, "Sampling rates do not match between audio files." # Compute MFCCs mfcc_true = librosa.feature.mfcc(y=y_true, sr=sr_true, n_mfcc=13).T mfcc_pred = librosa.feature.mfcc(y=y_pred, sr=sr_pred, n_mfcc=13).T # Align the MFCC frames min_frames = min(len(mfcc_true), len(mfcc_pred)) mfcc_true = mfcc_true[:min_frames] mfcc_pred = mfcc_pred[:min_frames] # Compute the Euclidean distance for each frame and average mcd = 0.0 for i in range(min_frames): mcd += euclidean(mfcc_true[i], mfcc_pred[i]) mcd = (10.0 / np.log(10)) * (mcd / min_frames) return round(mcd,2) # In[5]: #Define Translation and Synthesis Function def translate_transformers(modelName, sourceLangText): #results = translation_pipeline(input_text) translation_pipeline = pipeline('translation_en_to_yo', model = modelName, max_length=500) translated_text = translation_pipeline(sourceLangText) #translator(text)[0]["translation_text"] translated_text_target = translated_text[0]['translation_text'] #reference_translations = "awon apositeli, awon woli, awon ajinrere ati awon oluso agutan ati awon oluko." #'recorder_2024-01-13_11-24-41_453538.wav'#"My name is Joy, I love reading" #TTS for the translated_text_target #TTS Exp1 ttsModel = VitsModel.from_pretrained("facebook/mms-tts-yor") tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-yor") ttsInputs = tokenizer(translated_text_target, return_tensors="pt") set_seed(555) # make deterministic with torch.no_grad(): ttsOutput = ttsModel(**ttsInputs).waveform #Convert the tensor to a numpy array ttsWaveform = ttsOutput.numpy()[0] #Save the waveform to an audio file #sf.write('output.wav', waveform, 22050) sf.write('ttsOutput.wav', ttsWaveform, 16000) # Sample ground truth and predicted text2text translations for Clinical Text #ground_truth_text = "Àrùn jẹjẹrẹ ọmú jẹ́ ọ̀kan pàtàkì lára ohun tó ń ṣàkóbá fún ìlera gbogbo ènìyàn ní Nàìjíríà, ó sì jẹ́ ọ̀kan pàtàkì lára ohun tó ń fa ikú àwọn obìnrin tí àrùn jẹjẹrẹ ń pa lórílẹ̀-èdè náà." #predicted_text = translated_text_target #" breast cancer is a" # Sample ground truth and predicted text2text translations for News Text #ground_truth_text = "Wọ́n ní ìgbà àkọ́kọ́ nìyí tí irú ìwà ipá bẹ́ẹ̀ máa wáyé ní ìpínlẹ̀ Ondo." #predicted_text = translated_text_target #" breast cancer is a" # Sample ground truth and predicted text2text translations for Religion Text ground_truth_text = "Àwọn aposteli, àwọn wòlíì, àwọn ajíhìnrere, àwọn olùṣọ́-àgùntàn àti àwọn olùkọ́." predicted_text = translated_text_target #" breast cancer is a" #Compute bleu_score bleu_score = compute_bleu(ground_truth_text, predicted_text) print(f"Bleu Score (BLEU): {bleu_score:.2f}") #Compute WER wer_score = compute_wer(ground_truth_text, predicted_text) print(f"Word Error Rate (WER): {wer_score:.2f}") #Compute METEOR meteor = compute_meteor(ground_truth_text, predicted_text) print(f"METEOR Score: {meteor:.2f}") # Paths to sample audio files for MCD computation in current directory ground_truth_audio = os.path.join(os.getcwd(), "gt_ttsOutput.wav") predicted_audio = os.path.join(os.getcwd(), "ttsOutput.wav") # Compute Mel Cepstral Distance (MCD) try: mcd = compute_mcd(ground_truth_audio, predicted_audio) print(f"Mel Cepstral Distance (MCD): {mcd:.2f}") except Exception as e: print(f"Error computing MCD: {e}") return translated_text_target,bleu_score,wer_score,meteor,mcd,'ttsOutput.wav' # In[6]: #Define User Interface Function using Gradio and IPython Libraries import gradio as gr from IPython.display import Audio interface = gr.Interface( fn=translate_transformers, inputs=[ gr.Dropdown(["Davlan/byt5-base-eng-yor-mt", #Exp1 "Davlan/m2m100_418M-eng-yor-mt", #Exp2 "Davlan/mbart50-large-eng-yor-mt", #Exp3 "Davlan/mt5_base_eng_yor_mt", #Exp4 "omoekan/opus-tatoeba-eng-yor", #Exp5 "masakhane/afrimt5_en_yor_news", #Exp6 "masakhane/afrimbart_en_yor_news", #Exp7 "masakhane/afribyt5_en_yor_news", #Exp8 "masakhane/byt5_en_yor_news", #Exp9 "masakhane/mt5_en_yor_news", #Exp10 "masakhane/mbart50_en_yor_news", #Exp11 "masakhane/m2m100_418M_en_yor_news", #Exp12 "masakhane/m2m100_418M_en_yor_rel_news", #Exp13 "masakhane/m2m100_418M_en_yor_rel_news_ft", #Exp14 "masakhane/m2m100_418M_en_yor_rel", #Exp15 "dabagyan/menyo_en2yo", #Exp16 #"facebook/nllb-200-distilled-600M", #Exp17 #"facebook/nllb-200-3.3B", #Exp18 #"facebook/nllb-200-1.3B", #Exp19 #"facebook/nllb-200-distilled-1.3B", #Exp20 #"keithhon/nllb-200-3.3B" #Exp21 #"CohereForAI/aya-101" #Exp22 "facebook/m2m100_418M", #Exp17 #"facebook/m2m100_1.2B",#Exp18 #"facebook/m2m100-12B-avg-5-ckpt", #Exp19 "google/mt5-base", #Exp20 "google/byt5-large" #Exp21 ], label="Select Finetuned Eng2Yor Translation Model"), gr.Textbox(lines=2, placeholder="Enter English Text Here...", label="English Text") ], #outputs = "text", #outputs=outputs=["text", "text"],#"text" #outputs= gr.Textbox(value="text", label="Translated Text"), outputs=[ gr.Textbox(value="text", label="Translated Yoruba Text"), #gr.Textbox(value="text", label=translated_text_actual), gr.Textbox(value="number", label="BLEU SCORE"), gr.Textbox(value="number", label="WER(WORD ERROR RATE) SCORE - The Lower the Better"), gr.Textbox(value="number", label="METEOR SCORE"), gr.Textbox(value="number", label="MCD(MEL CESPRAL DISTANCE) SCORE"), gr.Audio(type="filepath", label="Click to Generate Yoruba Speech from the Translated Text") ], title="ASPMIR-MACHINE-TRANSLATION-TESTBED FOR LOW RESOURCED AFRICAN LANGUAGES", #gr.Markdown("**This Tool Allows Developers and Researchers to Carry Out Experiments on Low Resourced African Languages with State-of-the-Art NMT Finetuned Models.**"), description="{This Tool Allows Developers and Researchers to Carry Out Experiments on Low Resourced African Languages with State-of-the-Art Pretrained or Finetuned Models.}" ) #interface.launch(share=True) # In[7]: if __name__ == "__main__": interface.launch(share=True)