my / app.py
baekseol's picture
Update app.py
876df2c
# -*- coding: utf-8 -*-
"""1115_hf_space.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1sxftP6JDxb-_If8X0WyoSMiU-x-x-AzE
"""
# ๊ธฐ์‚ฌ ์š”์•ฝํ•ด์ฃผ๋Š” ๊ฐ์ž ๊ฐœ์ธ ์‚ฌ์ดํŠธ ๋งŒ๋“ค๊ธฐ
# ์‚ฌ์ดํŠธ: github pages: huggingface space
import gradio as gr
# gradio: interface๋ผ๋Š” ํด๋ž˜์Šค๋กœ ์ž…์ถœ๋ ฅ ์ƒ์ž๋ฅผ ์›น ์—˜๋ฆฌ๋จผํŠธ๋กœ ์ž๋™ ์ƒ์„ฑํ•ด์คŒ
from transformers import PreTrainedTokenizerFast, BartForConditionalGeneration
# from transformers import๋กœ ์‹œ์ž‘ํ•˜๋Š” import๋ฌธ์„ ๋ณด๋ฉด
# ๋งŽ์€ ๊ฒฝ์šฐ AutoTokenizer, AutoModel
# tokenizer= AutoTokenizer.from_pretrained("model ์ด๋ฆ„ ์–ด์ฉŒ๊ณ  ์ €์ฉŒ๊ณ  ใ…Žใ…Ž")
# BART๋Š” encoder-decoder ๋ชจ๋ธ์˜ ์˜ˆ์‹œ
model_name = "ainize/kobart-news"
tokenizer = PreTrainedTokenizerFast.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)
# Encode Input Text
input_text = '๊ตญ๋‚ด ์ „๋ฐ˜์ ์ธ ๊ฒฝ๊ธฐ์นจ์ฒด๋กœ ์ƒ๊ฐ€ ๊ฑด๋ฌผ์ฃผ์˜ ์ˆ˜์ต๋„ ์ „๊ตญ์ ์ธ ๊ฐ์†Œ์„ธ๋ฅผ ๋ณด์ด๊ณ  ์žˆ๋Š” ๊ฒƒ์œผ๋กœ ๋‚˜ํƒ€๋‚ฌ๋‹ค. ์ˆ˜์ตํ˜• ๋ถ€๋™์‚ฐ ์—ฐ๊ตฌ๊ฐœ๋ฐœ๊ธฐ์—… ์ƒ๊ฐ€์ •๋ณด์—ฐ๊ตฌ์†Œ๋Š” ํ•œ๊ตญ๊ฐ์ •์› ํ†ต๊ณ„๋ฅผ ๋ถ„์„ํ•œ ๊ฒฐ๊ณผ ์ „๊ตญ ์ค‘๋Œ€ํ˜• ์ƒ๊ฐ€ ์ˆœ์˜์—…์†Œ๋“(๋ถ€๋™์‚ฐ์—์„œ ๋ฐœ์ƒํ•˜๋Š” ์ž„๋Œ€์ˆ˜์ž…, ๊ธฐํƒ€์ˆ˜์ž…์—์„œ ์ œ๋ฐ˜ ๊ฒฝ๋น„๋ฅผ ๊ณต์ œํ•œ ์ˆœ์†Œ๋“)์ด 1๋ถ„๊ธฐ ใŽก๋‹น 3๋งŒ4200์›์—์„œ 3๋ถ„๊ธฐ 2๋งŒ5800์›์œผ๋กœ ๊ฐ์†Œํ–ˆ๋‹ค๊ณ  17์ผ ๋ฐํ˜”๋‹ค. ์ˆ˜๋„๊ถŒ, ์„ธ์ข…์‹œ, ์ง€๋ฐฉ๊ด‘์—ญ์‹œ์—์„œ ์ˆœ์˜์—…์†Œ๋“์ด ๊ฐ€์žฅ ๋งŽ์ด ๊ฐ์†Œํ•œ ์ง€์—ญ์€ 3๋ถ„๊ธฐ 1๋งŒ3100์›์„ ๊ธฐ๋กํ•œ ์šธ์‚ฐ์œผ๋กœ, 1๋ถ„๊ธฐ 1๋งŒ9100์› ๋Œ€๋น„ 31.4% ๊ฐ์†Œํ–ˆ๋‹ค. ์ด์–ด ๋Œ€๊ตฌ(-27.7%), ์„œ์šธ(-26.9%), ๊ด‘์ฃผ(-24.9%), ๋ถ€์‚ฐ(-23.5%), ์„ธ์ข…(-23.4%), ๋Œ€์ „(-21%), ๊ฒฝ๊ธฐ(-19.2%), ์ธ์ฒœ(-18.5%) ์ˆœ์œผ๋กœ ๊ฐ์†Œํ–ˆ๋‹ค. ์ง€๋ฐฉ ๋„์‹œ์˜ ๊ฒฝ์šฐ๋„ ๋น„์Šทํ–ˆ๋‹ค. ๊ฒฝ๋‚จ์˜ 3๋ถ„๊ธฐ ์ˆœ์˜์—…์†Œ๋“์€ 1๋งŒ2800์›์œผ๋กœ 1๋ถ„๊ธฐ 1๋งŒ7400์› ๋Œ€๋น„ 26.4% ๊ฐ์†Œํ–ˆ์œผ๋ฉฐ ์ œ์ฃผ(-25.1%), ๊ฒฝ๋ถ(-24.1%), ์ถฉ๋‚จ(-20.9%), ๊ฐ•์›(-20.9%), ์ „๋‚จ(-20.1%), ์ „๋ถ(-17%), ์ถฉ๋ถ(-15.3%) ๋“ฑ๋„ ๊ฐ์†Œ์„ธ๋ฅผ ๋ณด์˜€๋‹ค. ์กฐํ˜„ํƒ ์ƒ๊ฐ€์ •๋ณด์—ฐ๊ตฌ์†Œ ์—ฐ๊ตฌ์›์€ "์˜ฌํ•ด ๋‚ด์ˆ˜ ๊ฒฝ๊ธฐ์˜ ์นจ์ฒด๋œ ๋ถ„์œ„๊ธฐ๊ฐ€ ์œ ์ง€๋˜๋ฉฐ ์ƒ๊ฐ€, ์˜คํ”ผ์Šค ๋“ฑ์„ ๋น„๋กฏํ•œ ์ˆ˜์ตํ˜• ๋ถ€๋™์‚ฐ ์‹œ์žฅ์˜ ๋ถ„์œ„๊ธฐ๋„ ๊ฒฝ์ง๋œ ๋ชจ์Šต์„ ๋ณด์˜€๊ณ  ์˜คํ”ผ์Šคํ…”, ์ง€์‹์‚ฐ์—…์„ผํ„ฐ ๋“ฑ์˜ ์ˆ˜์ตํ˜• ๋ถ€๋™์‚ฐ ๊ณต๊ธ‰๋„ ์ฆ๊ฐ€ํ•ด ๊ณต์‹ค์˜ ์œ„ํ—˜๋„ ๋Š˜์—ˆ๋‹ค"๋ฉฐ "์‹ค์ œ ์˜ฌ 3๋ถ„๊ธฐ ์ „๊ตญ ์ค‘๋Œ€ํ˜• ์ƒ๊ฐ€ ๊ณต์‹ค๋ฅ ์€ 11.5%๋ฅผ ๊ธฐ๋กํ•˜๋ฉฐ 1๋ถ„๊ธฐ 11.3% ๋Œ€๋น„ 0.2% ํฌ์ธํŠธ ์ฆ๊ฐ€ํ–ˆ๋‹ค"๊ณ  ๋งํ–ˆ๋‹ค. ๊ทธ๋Š” "์ตœ๊ทผ ์†Œ์…œ์ปค๋จธ์Šค(SNS๋ฅผ ํ†ตํ•œ ์ „์ž์ƒ๊ฑฐ๋ž˜), ์Œ์‹ ๋ฐฐ๋‹ฌ ์ค‘๊ฐœ ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜, ์ค‘๊ณ  ๋ฌผํ’ˆ ๊ฑฐ๋ž˜ ์• ํ”Œ๋ฆฌ์ผ€์ด์…˜ ๋“ฑ์˜ ์‚ฌ์šฉ ์ฆ๊ฐ€๋กœ ์˜คํ”„๋ผ์ธ ๋งค์žฅ์— ์˜ํ–ฅ์„ ๋ฏธ์ณค๋‹ค"๋ฉฐ "ํ–ฅํ›„ ์ง€์—ญ, ์ฝ˜ํ…์ธ ์— ๋”ฐ๋ฅธ ์ƒ๊ถŒ ์–‘๊ทนํ™” ํ˜„์ƒ์€ ์‹ฌํ™”๋  ๊ฒƒ์œผ๋กœ ๋ณด์ธ๋‹ค"๊ณ  ๋ง๋ถ™์˜€๋‹ค.'
input_ids = tokenizer.encode(input_text, return_tensors="pt")
# ๊ฐ ํ† ํฐ์ด ์ •์ˆ˜ ID๋กœ ๋ฐ”๋€œ
# Generate Summary Text Ids
summary_text_ids = model.generate(
input_ids=input_ids,
bos_token_id=model.config.bos_token_id, # BOS๋Š” Beginning Of Sentence
eos_token_id=model.config.eos_token_id, # EOS๋Š” End Of Sentence
length_penalty=2.0, # ์š”์•ฝ์„ ์–ผ๋งˆ๋‚˜ ์งง๊ฒŒ ํ• ์ง€
max_length=142,
min_length=56,
num_beams=4, # beam search
)
# Decoding Text
print(tokenizer.decode(summary_text_ids[0], skip_special_tokens=True))