pragnakalp commited on
Commit
1a99dd4
·
1 Parent(s): 9cca6b7

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import absolute_import, division, print_function, unicode_literals
2
+ import os
3
+ import gradio as gr
4
+ from fastai.text.all import *
5
+ from transformers import *
6
+ from blurr.data.all import *
7
+ from blurr.modeling.all import *
8
+ import spacy
9
+ from spacy_readability import Readability
10
+
11
+ readablility_nlp = spacy.load('en_core_web_sm')
12
+ read = Readability()
13
+ cwd = os.getcwd()
14
+ readablility_nlp.add_pipe(read, last=True)
15
+
16
+ bart_ext_model_path = os.path.join(cwd, 'models/bart_extractive_model')
17
+ bart_extractive_model = BartForConditionalGeneration.from_pretrained(bart_ext_model_path)
18
+ bart_extractive_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
19
+
20
+ t5_model_path = os.path.join(cwd, 'models/t5_model')
21
+ t5_model = AutoModelWithLMHead.from_pretrained(t5_model_path)
22
+ t5_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-summarize-news")
23
+
24
+ def generate_text_summarization(sum_type,article):
25
+
26
+ if sum_type == 'BART Extractive Text Summarization':
27
+ inputs = bart_extractive_tokenizer([article], max_length=1024, return_tensors='pt')
28
+ summary_ids = bart_extractive_model.generate(inputs['input_ids'], num_beams=4, min_length=60, max_length=300, early_stopping=True)
29
+
30
+ summary = [bart_extractive_tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
31
+ print(type(summary))
32
+ print(summary)
33
+ summary= summary[0]
34
+ doc = readablility_nlp(summary)
35
+ summary_score = round(doc._.flesch_kincaid_reading_ease,2)
36
+ summarized_data = {
37
+ "summary" : summary,
38
+ "score" : summary_score
39
+ }
40
+ return summary
41
+
42
+ if sum_type == 'T5 Abstractive Text Summarization':
43
+ inputs = t5_tokenizer.encode(article, return_tensors="pt", max_length=2048)
44
+ summary_ids = t5_model.generate(inputs,
45
+ num_beams=2,
46
+ no_repeat_ngram_size=2,
47
+ min_length=100,
48
+ max_length=300,
49
+ early_stopping=True)
50
+
51
+ summary = t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
52
+ print(type(summary))
53
+ print(summary)
54
+ doc = readablility_nlp(summary)
55
+ summary_score = round(doc._.flesch_kincaid_reading_ease,2)
56
+ summarized_data = {
57
+ "summary" : summary,
58
+ "score" : summary_score
59
+ }
60
+ return summary
61
+
62
+ input_text=gr.Textbox(lines=5, label="Paragraph")
63
+ input_radio= gr.Radio(['BART Extractive Text Summarization','T5 Abstractive Text Summarization'],label='Select summarization',value='BART Extractive Text Summarization')
64
+ output_text=gr.Textbox(lines=7, label="Summarize text")
65
+ demo = gr.Interface(
66
+ generate_text_summarization,
67
+ [input_radio,input_text],
68
+ output_text,
69
+ title="Text Summarization",
70
+ css=".gradio-container {background-color: lightgray}",
71
+ article="""<p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
72
+ )
73
+
74
+ demo.launch()