# -*- coding: utf-8 -*- | |
"""20231115_hf_space์ ์ฌ๋ณธ | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/167WkIz-J7_z4FG65GkVPfkosxIXlKMQq | |
""" | |
# ๊ธฐ์ฌ ์์ฝํด์ฃผ๋ ๊ฐ์ ๊ฐ์ธ ์ฌ์ดํธ ๋ง๋ค๊ธฐ | |
# ์ฌ์ดํธ: github pages: huggingface space | |
import gradio as gr | |
# Interface๋ผ๋ ํด๋์ค๋ก ์ ์ถ๋ ฅ ์์๋ฅผ ์น ์๋ฆฌ๋จผํธ๋ก ์๋ ์์ฑํด์ค | |
from transformers import PreTrainedTokenizerFast,BartForConditionalGeneration | |
# PreTrainedTokenizerFast: ์ฌ์ ํ๋ จ๋ ํ ํฌ๋์ด์ ๋ก, ํ ์คํธ๋ฅผ ๋ชจ๋ธ์ด ์ดํดํ ์ ์๋ ํ์์ผ๋ก ๋ณํํฉ๋๋ค. | |
# BartForConditionalGeneration: BART ๋ชจ๋ธ์ ๋ณํ์ผ๋ก ์์ฝ, ๋ฒ์ญ, ํ ์คํธ ์์ฑ ๋ฑ์ ์ฌ์ฉ | |
# Bart๋ encorder-decoder ๋ชจ๋ธ์ ์์ | |
# from transformers import๋ก ์์ํ๋ import๋ฌธ์ ๋ณด๋ฉด | |
# ๋ง์ ๊ฒฝ์ฐ AutoTokenizer, AutoModel | |
# tokenizer = AutoTokenizer.from_pretrained("model name") | |
# Load Model and Tokenize | |
tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news") | |
model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news") | |
# ์๋ฌธ์ ๋ฐ์์ ์์ฝ๋ฌธ์ ๋ฐํ | |
def summ(txt): | |
input_ids = tokenizer.encode(input_text, return_tensors="pt") | |
summary_text_ids = model.generate( | |
input_ids=input_ids, | |
bos_token_id=model.config.bos_token_id, # BOS๋ Beginning of Sentence | |
eos_token_id=model.config.eos_token_id, # EOS๋ End Of Sentence | |
length_penalty=2.0, # ์์ฝ์ ์ผ๋ง๋ ์งง๊ฒ ํ ์ง | |
max_length=142, # | |
min_length=56, # | |
num_beams=4) # beam search -> ๊ฐ์ง ์ ๋ผ๊ณ ์๊ฐํ๋ฉด ๋จ. ๊ฐ์ง 4๊ฐ๋ฅผ ํผ์น๊ณ ๊ทธ ๊ฐ๊ฐ์ง์์ 4๊ฐ๋ฅผ ํผ์น ํ ์ด 16๊ฐ์ค ๊ฐ์ฅ ์ ํฉํ 4๊ฐ๋ฅผ ๊ณ ๋ฅธ ๊ฐ์ง๋ฅผ ํผ์ณ ๋ฐ๋ณต ๊ณผ์ | |
return tokenizer.decode(summary_text_ids[0], skip_special_tokens=True) | |
interface = gr.Interface(summ, | |
[gr.Textbox(label = "original text")], | |
[gr.Textbox(label = "summary")]) | |
interface.launch() | |