chavezord's picture
Upload 11 files
10af882 verified
from transformers import MarianMTModel, MarianTokenizer
from translations.model_name_mapping import MODEL_NAME_MAPPING
def load_model_and_tokenizer(from_lang: str, to_lang: str):
print(f"load_model_and_tokenizer from: {from_lang}, to: {to_lang}")
model_name = MODEL_NAME_MAPPING.get((from_lang, to_lang))
print(f"model_name: {model_name}")
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
return model, tokenizer
def translate(text, model, tokenizer):
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
translated_text = [
tokenizer.decode(t, skip_special_tokens=True) for t in translated
]
return translated_text[0]
def test_translations(from_lang, to_lang, text_to_translate):
model, tokenizer = load_model_and_tokenizer(from_lang=from_lang, to_lang=to_lang)
translation = translate(text_to_translate, model, tokenizer)
print(f"Translated text from: {from_lang}, to: {to_lang}, translation: {translation}")
if __name__ == "__main__":
# text_to_translate = "hola amigos, tengo hambre"
# test_translations(from_lang="es", to_lang="en", text_to_translate=text_to_translate)
# test_translations(from_lang="es", to_lang="ca", text_to_translate=text_to_translate)
# test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate)
text_to_translate = "hello friends, who's hungry?"
test_translations(from_lang="en", to_lang="es", text_to_translate=text_to_translate)
# test_translations(from_lang="en", to_lang="ca", text_to_translate=text_to_translate)
test_translations(from_lang="en", to_lang="pt", text_to_translate=text_to_translate)
# test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate)
text_to_translate = "un cafè sense sucre i amb llet, si us plau"
test_translations(from_lang="ca", to_lang="es", text_to_translate=text_to_translate)