File size: 656 Bytes
f7dcae3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from transformers import MarianMTModel, MarianTokenizer

# 1) MarianMT ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
model_name = "Helsinki-NLP/opus-mt-ko-en"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)

# 2) ๋ฒˆ์—ญ ํ•จ์ˆ˜ ์˜ˆ์‹œ
def translate_ko_to_en(text: str) -> str:
    # ์ž…๋ ฅ ๋ฌธ์žฅ์„ ํ† ํฐํ™”
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    # ๋ชจ๋ธ๋กœ ์ถ”๋ก 
    translated = model.generate(**inputs)
    # ํ† ํฐ์„ ๋‹ค์‹œ ๋ฌธ์žฅ์œผ๋กœ ๋””์ฝ”๋”ฉ
    translation = tokenizer.batch_decode(translated, skip_special_tokens=True)
    return translation[0]