Spaces:
Sleeping
Sleeping
from transformers import MarianMTModel, MarianTokenizer | |
from translations.model_name_mapping import MODEL_NAME_MAPPING | |
def load_model_and_tokenizer(from_lang: str, to_lang: str): | |
print(f"load_model_and_tokenizer from: {from_lang}, to: {to_lang}") | |
model_name = MODEL_NAME_MAPPING.get((from_lang, to_lang)) | |
print(f"model_name: {model_name}") | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
return model, tokenizer | |
def translate(text, model, tokenizer): | |
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True)) | |
translated_text = [ | |
tokenizer.decode(t, skip_special_tokens=True) for t in translated | |
] | |
return translated_text[0] | |
def test_translations(from_lang, to_lang, text_to_translate): | |
model, tokenizer = load_model_and_tokenizer(from_lang=from_lang, to_lang=to_lang) | |
translation = translate(text_to_translate, model, tokenizer) | |
print(f"Translated text from: {from_lang}, to: {to_lang}, translation: {translation}") | |
if __name__ == "__main__": | |
# text_to_translate = "hola amigos, tengo hambre" | |
# test_translations(from_lang="es", to_lang="en", text_to_translate=text_to_translate) | |
# test_translations(from_lang="es", to_lang="ca", text_to_translate=text_to_translate) | |
# test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate) | |
text_to_translate = "hello friends, who's hungry?" | |
test_translations(from_lang="en", to_lang="es", text_to_translate=text_to_translate) | |
# test_translations(from_lang="en", to_lang="ca", text_to_translate=text_to_translate) | |
test_translations(from_lang="en", to_lang="pt", text_to_translate=text_to_translate) | |
# test_translations(from_lang="es", to_lang="po", text_to_translate=text_to_translate) | |
text_to_translate = "un cafè sense sucre i amb llet, si us plau" | |
test_translations(from_lang="ca", to_lang="es", text_to_translate=text_to_translate) |