idec1 / 20231115_hf_space_app.py
pigotter's picture
Update 20231115_hf_space_app.py
fda799c
# -*- coding: utf-8 -*-
"""20231115_hf_space์˜ ์‚ฌ๋ณธ
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/167WkIz-J7_z4FG65GkVPfkosxIXlKMQq
"""
# ๊ธฐ์‚ฌ ์š”์•ฝํ•ด์ฃผ๋Š” ๊ฐ์ž ๊ฐœ์ธ ์‚ฌ์ดํŠธ ๋งŒ๋“ค๊ธฐ
# ์‚ฌ์ดํŠธ: github pages: huggingface space
import gradio as gr
# Interface๋ผ๋Š” ํด๋ž˜์Šค๋กœ ์ž…์ถœ๋ ฅ ์ƒ์ž๋ฅผ ์›น ์—˜๋ฆฌ๋จผํŠธ๋กœ ์ž๋™ ์ƒ์„ฑํ•ด์คŒ
from transformers import PreTrainedTokenizerFast,BartForConditionalGeneration
# PreTrainedTokenizerFast: ์‚ฌ์ „ ํ›ˆ๋ จ๋œ ํ† ํฌ๋‚˜์ด์ €๋กœ, ํ…์ŠคํŠธ๋ฅผ ๋ชจ๋ธ์ด ์ดํ•ดํ•  ์ˆ˜ ์žˆ๋Š” ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
# BartForConditionalGeneration: BART ๋ชจ๋ธ์˜ ๋ณ€ํ˜•์œผ๋กœ ์š”์•ฝ, ๋ฒˆ์—ญ, ํ…์ŠคํŠธ ์ƒ์„ฑ ๋“ฑ์— ์‚ฌ์šฉ
# Bart๋Š” encorder-decoder ๋ชจ๋ธ์˜ ์˜ˆ์‹œ
# from transformers import๋กœ ์‹œ์ž‘ํ•˜๋Š” import๋ฌธ์„ ๋ณด๋ฉด
# ๋งŽ์€ ๊ฒฝ์šฐ AutoTokenizer, AutoModel
# tokenizer = AutoTokenizer.from_pretrained("model name")
# Load Model and Tokenize
tokenizer = PreTrainedTokenizerFast.from_pretrained("ainize/kobart-news")
model = BartForConditionalGeneration.from_pretrained("ainize/kobart-news")
# ์›๋ฌธ์„ ๋ฐ›์•„์„œ ์š”์•ฝ๋ฌธ์„ ๋ฐ˜ํ™˜
def summ(txt):
input_ids = tokenizer.encode(input_text, return_tensors="pt")
summary_text_ids = model.generate(
input_ids=input_ids,
bos_token_id=model.config.bos_token_id, # BOS๋Š” Beginning of Sentence
eos_token_id=model.config.eos_token_id, # EOS๋Š” End Of Sentence
length_penalty=2.0, # ์š”์•ฝ์„ ์–ผ๋งˆ๋‚˜ ์งง๊ฒŒ ํ• ์ง€
max_length=142, #
min_length=56, #
num_beams=4) # beam search -> ๊ฐ€์ง€ ์ˆ˜ ๋ผ๊ณ  ์ƒ๊ฐํ•˜๋ฉด ๋จ. ๊ฐ€์ง€ 4๊ฐœ๋ฅผ ํŽผ์น˜๊ณ  ๊ทธ ๊ฐ๊ฐ€์ง€์—์„œ 4๊ฐœ๋ฅผ ํŽผ์นœ ํ›„ ์ด 16๊ฐœ์ค‘ ๊ฐ€์žฅ ์ ํ•ฉํ•œ 4๊ฐœ๋ฅผ ๊ณ ๋ฅธ ๊ฐ€์ง€๋ฅผ ํŽผ์ณ ๋ฐ˜๋ณต ๊ณผ์ •
return tokenizer.decode(summary_text_ids[0], skip_special_tokens=True)
interface = gr.Interface(summ,
[gr.Textbox(label = "original text")],
[gr.Textbox(label = "summary")])
interface.launch()