Kyudan commited on
Commit
f7dcae3
ยท
1 Parent(s): 624dfff
Files changed (1) hide show
  1. translate_utils.py +16 -0
translate_utils.py CHANGED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import MarianMTModel, MarianTokenizer
2
+
3
+ # 1) MarianMT ๋ชจ๋ธ๊ณผ ํ† ํฌ๋‚˜์ด์ € ๋ถˆ๋Ÿฌ์˜ค๊ธฐ
4
+ model_name = "Helsinki-NLP/opus-mt-ko-en"
5
+ tokenizer = MarianTokenizer.from_pretrained(model_name)
6
+ model = MarianMTModel.from_pretrained(model_name)
7
+
8
+ # 2) ๋ฒˆ์—ญ ํ•จ์ˆ˜ ์˜ˆ์‹œ
9
+ def translate_ko_to_en(text: str) -> str:
10
+ # ์ž…๋ ฅ ๋ฌธ์žฅ์„ ํ† ํฐํ™”
11
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
12
+ # ๋ชจ๋ธ๋กœ ์ถ”๋ก 
13
+ translated = model.generate(**inputs)
14
+ # ํ† ํฐ์„ ๋‹ค์‹œ ๋ฌธ์žฅ์œผ๋กœ ๋””์ฝ”๋”ฉ
15
+ translation = tokenizer.batch_decode(translated, skip_special_tokens=True)
16
+ return translation[0]