Brass-monkey commited on
Commit
3243b63
·
1 Parent(s): 3fcefd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -8
app.py CHANGED
@@ -2,26 +2,25 @@ import gradio as gr
2
  from PyPDF2 import PdfReader
3
  from transformers import pipeline
4
 
5
- def summarize_and_extract_text(files):
6
- summarizer = pipeline("summarization")
7
  summaries = []
8
- extracted_texts = []
9
  for file in files:
10
  with open(file.name, "rb") as pdf_file:
11
  pdf_reader = PdfReader(pdf_file)
12
  text = ""
13
  for page in pdf_reader.pages:
14
  text += page.extract_text() + "\n"
15
- extracted_texts.append(text)
16
  summary = summarizer(text, max_length=50, min_length=10, do_sample=False)[0]['summary_text']
17
  summaries.append(summary)
18
- return summaries, extracted_texts
19
 
20
  iface = gr.Interface(
21
- fn=summarize_and_extract_text,
22
  inputs=gr.UploadButton("Click to Upload a PDF", file_types=["pdf"], file_count="multiple"),
23
- outputs=[gr.Textbox(label="Summarized Text"), gr.Textbox(label="Extracted Text")],
24
- title="PDF Summarizer & Extracted Text"
25
  )
26
 
27
  iface.launch()
 
2
  from PyPDF2 import PdfReader
3
  from transformers import pipeline
4
 
5
+ def summarize_pdf_content(files):
6
+ summarizer = pipeline("summarization", model="stevhliu/my_awesome_billsum_model")
7
  summaries = []
8
+
9
  for file in files:
10
  with open(file.name, "rb") as pdf_file:
11
  pdf_reader = PdfReader(pdf_file)
12
  text = ""
13
  for page in pdf_reader.pages:
14
  text += page.extract_text() + "\n"
 
15
  summary = summarizer(text, max_length=50, min_length=10, do_sample=False)[0]['summary_text']
16
  summaries.append(summary)
17
+ return summaries
18
 
19
  iface = gr.Interface(
20
+ fn=summarize_pdf_content,
21
  inputs=gr.UploadButton("Click to Upload a PDF", file_types=["pdf"], file_count="multiple"),
22
+ outputs=gr.Textbox(label="Summarized Text"),
23
+ title="PDF Summarizer"
24
  )
25
 
26
  iface.launch()