SIF2025_demo / translate_utils.py
Kyudan
update
f7dcae3
raw
history blame
656 Bytes
from transformers import MarianMTModel, MarianTokenizer
# 1) MarianMT ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
model_name = "Helsinki-NLP/opus-mt-ko-en"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
# 2) ๋ฒˆ์—ญ ํ•จ์ˆ˜ ์˜ˆ์‹œ
def translate_ko_to_en(text: str) -> str:
# ์ž…๋ ฅ ๋ฌธ์žฅ์„ ํ† ํฐํ™”
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
# ๋ชจ๋ธ๋กœ ์ถ”๋ก 
translated = model.generate(**inputs)
# ํ† ํฐ์„ ๋‹ค์‹œ ๋ฌธ์žฅ์œผ๋กœ ๋””์ฝ”๋”ฉ
translation = tokenizer.batch_decode(translated, skip_special_tokens=True)
return translation[0]