Ari commited on
Commit
13b92eb
·
verified ·
1 Parent(s): fd55519

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -20
app.py CHANGED
@@ -16,7 +16,7 @@ import yake
16
  from zipfile import ZipFile
17
  from gtts import gTTS
18
  from transformers import AutoTokenizer, AutoModelForPreTraining, AutoModel, AutoConfig
19
- from summarizer import Summarizer,TransformerSummarizer
20
  from transformers import pipelines
21
  from pdfminer.high_level import extract_text
22
 
@@ -33,47 +33,33 @@ def pdf_to_text(text, PDF):
33
  if text == "":
34
  # The setup of huggingface.co
35
  file_obj = PDF
36
- #n = int(Percent.replace('%', ''))
37
-
38
  text = extract_text(file_obj.name)
39
  inputs = tokenizer([text], max_length=1024, return_tensors="pt")
40
 
41
  Min = int(Min)
42
  # Generate Summary
43
- summary_ids = model.generate(inputs["input_ids"], num_beams=2,min_length=Min, max_length=Min+1000)
44
  output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
45
 
46
  else:
47
  inputs = tokenizer([text], max_length=1024, return_tensors="pt")
48
  # Generate Summary
49
-
50
- summary_ids = model.generate(inputs["input_ids"], num_beams=2,min_length=Min, max_length=Min+1000)
51
  output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
52
 
53
-
54
- #output_text= bert_legal_model(text, min_length = 8, ratio = 0.05)
55
-
56
-
57
  pdf = FPDF()
58
  pdf.add_page()
59
  pdf.set_font("Times", size = 12)
60
- # open the text file in read mode
61
  f = output_text
62
- # insert the texts in pdf
63
  pdf.multi_cell(190, 10, txt = f, align = 'C')
64
- # save the pdf with name .pdf
65
  pdf.output("legal.pdf")
66
 
67
  myobj = gTTS(text=output_text, lang='en', slow=False)
68
  myobj.save("legal.wav")
69
 
70
- return "legal.wav", output_text, "legal.pdf"
71
-
72
 
73
- # return path
74
- #pageObject.extractText()
75
- iface = gr.Interface(fn = pdf_to_text,
76
- inputs =["text", "file"], outputs=["audio","text", "file"] )
77
 
78
  if __name__ == "__main__":
79
- iface.launch(share=True)
 
16
  from zipfile import ZipFile
17
  from gtts import gTTS
18
  from transformers import AutoTokenizer, AutoModelForPreTraining, AutoModel, AutoConfig
19
+ from summarizer import Summarizer, TransformerSummarizer
20
  from transformers import pipelines
21
  from pdfminer.high_level import extract_text
22
 
 
33
  if text == "":
34
  # The setup of huggingface.co
35
  file_obj = PDF
 
 
36
  text = extract_text(file_obj.name)
37
  inputs = tokenizer([text], max_length=1024, return_tensors="pt")
38
 
39
  Min = int(Min)
40
  # Generate Summary
41
+ summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=Min, max_length=Min+1000)
42
  output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
43
 
44
  else:
45
  inputs = tokenizer([text], max_length=1024, return_tensors="pt")
46
  # Generate Summary
47
+ summary_ids = model.generate(inputs["input_ids"], num_beams=2, min_length=Min, max_length=Min+1000)
 
48
  output_text = tokenizer.batch_decode(summary_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
49
 
 
 
 
 
50
  pdf = FPDF()
51
  pdf.add_page()
52
  pdf.set_font("Times", size = 12)
 
53
  f = output_text
 
54
  pdf.multi_cell(190, 10, txt = f, align = 'C')
 
55
  pdf.output("legal.pdf")
56
 
57
  myobj = gTTS(text=output_text, lang='en', slow=False)
58
  myobj.save("legal.wav")
59
 
60
+ return "legal.wav", output_text, "legal.pdf"
 
61
 
62
+ iface = gr.Interface(fn=pdf_to_text, inputs=["text", "file"], outputs=["audio", "text", "file"])
 
 
 
63
 
64
  if __name__ == "__main__":
65
+ iface.launch() # Removed 'share=True'