PirateXX commited on
Commit
42d7fda
·
1 Parent(s): 8ec7c36

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -10
app.py CHANGED
@@ -5,6 +5,8 @@ import gradio as gr
5
  import os
6
  import re
7
  import pdfplumber
 
 
8
  app = Flask(__name__)
9
 
10
  ACCESS_TOKEN = os.environ["ACCESS_TOKEN"]
@@ -79,17 +81,26 @@ def upload_file(file):
79
  # return findRealProb(text)
80
  # # return jsonify({'text': text})
81
  if file:
82
- pdf_file = file.name
83
- print(file, pdf_file)
84
- text = ""
85
- with pdfplumber.open(pdf_file) as pdf:
86
- cnt = 0
87
- for page in pdf.pages:
88
- cnt+=1
89
- text+=(page.extract_text(x_tolerance = 1))
90
- if cnt>5:
91
- break
92
  return findRealProb(text)
 
 
 
 
 
 
 
 
 
 
 
93
  else:
94
  return {"error":'No PDF file found in request'}
95
 
 
5
  import os
6
  import re
7
  import pdfplumber
8
+ import PyPDF2
9
+
10
  app = Flask(__name__)
11
 
12
  ACCESS_TOKEN = os.environ["ACCESS_TOKEN"]
 
81
  # return findRealProb(text)
82
  # # return jsonify({'text': text})
83
  if file:
84
+
85
+ with open(file.name, 'rb') as pdf_file:
86
+ pdf_reader = PyPDF2.PdfFileReader(pdf_file)
87
+ text = ''
88
+ for page_num in range(pdf_reader.getNumPages()):
89
+ page = pdf_reader.getPage(page_num)
90
+ text += page.extractText()
91
+
 
 
92
  return findRealProb(text)
93
+ # pdf_file = file.name
94
+ # print(file, pdf_file)
95
+ # text = ""
96
+ # with pdfplumber.open(pdf_file) as pdf:
97
+ # cnt = 0
98
+ # for page in pdf.pages:
99
+ # cnt+=1
100
+ # text+=(page.extract_text(x_tolerance = 1))
101
+ # if cnt>5:
102
+ # break
103
+ # return findRealProb(text)
104
  else:
105
  return {"error":'No PDF file found in request'}
106