Spaces:
Sleeping
Sleeping
Ari
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -16,7 +16,7 @@ import yake
|
|
16 |
from zipfile import ZipFile
|
17 |
from gtts import gTTS
|
18 |
from transformers import AutoTokenizer, AutoModelForPreTraining, AutoModel, AutoConfig
|
19 |
-
from summarizer import Summarizer,TransformerSummarizer
|
20 |
from transformers import pipelines
|
21 |
from pdfminer.high_level import extract_text
|
22 |
|
@@ -33,47 +33,33 @@ def pdf_to_text(text, PDF):
|
|
33 |
if text == "":
|
34 |
# The setup of huggingface.co
|
35 |
file_obj = PDF
|
36 |
-
#n = int(Percent.replace('%', ''))
|
37 |
-
|
38 |
text = extract_text(file_obj.name)
|
39 |
inputs = tokenizer([text], max_length=1024, return_tensors="pt")
|
40 |
|
41 |
Min = int(Min)
|
42 |
# Generate Summary
|
43 |
-
summary_ids = model.generate(inputs["input_ids"], num_beams=2,min_length=Min, max_length=Min+1000)
|
44 |
output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
45 |
|
46 |
else:
|
47 |
inputs = tokenizer([text], max_length=1024, return_tensors="pt")
|
48 |
# Generate Summary
|
49 |
-
|
50 |
-
summary_ids = model.generate(inputs["input_ids"], num_beams=2,min_length=Min, max_length=Min+1000)
|
51 |
output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
52 |
|
53 |
-
|
54 |
-
#output_text= bert_legal_model(text, min_length = 8, ratio = 0.05)
|
55 |
-
|
56 |
-
|
57 |
pdf = FPDF()
|
58 |
pdf.add_page()
|
59 |
pdf.set_font("Times", size = 12)
|
60 |
-
# open the text file in read mode
|
61 |
f = output_text
|
62 |
-
# insert the texts in pdf
|
63 |
pdf.multi_cell(190, 10, txt = f, align = 'C')
|
64 |
-
# save the pdf with name .pdf
|
65 |
pdf.output("legal.pdf")
|
66 |
|
67 |
myobj = gTTS(text=output_text, lang='en', slow=False)
|
68 |
myobj.save("legal.wav")
|
69 |
|
70 |
-
return
|
71 |
-
|
72 |
|
73 |
-
|
74 |
-
#pageObject.extractText()
|
75 |
-
iface = gr.Interface(fn = pdf_to_text,
|
76 |
-
inputs =["text", "file"], outputs=["audio","text", "file"] )
|
77 |
|
78 |
if __name__ == "__main__":
|
79 |
-
iface.launch(share=True
|
|
|
16 |
from zipfile import ZipFile
|
17 |
from gtts import gTTS
|
18 |
from transformers import AutoTokenizer, AutoModelForPreTraining, AutoModel, AutoConfig
|
19 |
+
from summarizer import Summarizer, TransformerSummarizer
|
20 |
from transformers import pipelines
|
21 |
from pdfminer.high_level import extract_text
|
22 |
|
|
|
33 |
if text == "":
|
34 |
# The setup of huggingface.co
|
35 |
file_obj = PDF
|
|
|
|
|
36 |
text = extract_text(file_obj.name)
|
37 |
inputs = tokenizer([text], max_length=1024, return_tensors="pt")
|
38 |
|
39 |
Min = int(Min)
|
40 |
# Generate Summary
|
41 |
+
summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=Min, max_length=Min+1000)
|
42 |
output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
43 |
|
44 |
else:
|
45 |
inputs = tokenizer([text], max_length=1024, return_tensors="pt")
|
46 |
# Generate Summary
|
47 |
+
summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=Min, max_length=Min+1000)
|
|
|
48 |
output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
|
49 |
|
|
|
|
|
|
|
|
|
50 |
pdf = FPDF()
|
51 |
pdf.add_page()
|
52 |
pdf.set_font("Times", size = 12)
|
|
|
53 |
f = output_text
|
|
|
54 |
pdf.multi_cell(190, 10, txt = f, align = 'C')
|
|
|
55 |
pdf.output("legal.pdf")
|
56 |
|
57 |
myobj = gTTS(text=output_text, lang='en', slow=False)
|
58 |
myobj.save("legal.wav")
|
59 |
|
60 |
+
return "legal.wav", output_text, "legal.pdf"
|
|
|
61 |
|
62 |
+
iface = gr.Interface(fn=pdf_to_text, inputs=["text", "file"], outputs=["audio", "text", "file"])
|
|
|
|
|
|
|
63 |
|
64 |
if __name__ == "__main__":
|
65 |
+
iface.launch() # Removed 'share=True'
|