ShiwenNi commited on
Commit
54d17c2
1 Parent(s): 4d1faf4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -23
app.py CHANGED
@@ -62,29 +62,28 @@ class Reviewer:
62
  return result, response.usage.total_tokens
63
 
64
  def extract_chapter(self, pdf_path):
65
- with open(pdf_path, 'rb') as file:
66
- # 创建一个PDF阅读器对象
67
- pdf_reader = PyPDF2.PdfReader(pdf_path)
68
- # 获取PDF的总页数
69
- num_pages = len(pdf_reader.pages)
70
- # 初始化提取状态和提取文本
71
- extraction_started = False
72
- extracted_text = ""
73
- # 遍历PDF中的每一页
74
- for page_number in range(num_pages):
75
- page = pdf_reader.pages[page_number]
76
- page_text = page.extract_text()
77
-
78
- # 如果找到了章节标题,开始提取
79
- if 'Abstract'.lower() in page_text.lower() and not extraction_started:
80
- extraction_started = True
81
- page_number_start = page_number
82
- # 如果提取已开始,将页面文本添加到提取文本中
83
- if extraction_started:
84
- extracted_text += page_text
85
- # 如果找到下一章节标题,停止提取
86
- if page_number_start + 1 < page_number:
87
- break
88
  return extracted_text
89
 
90
  def main(api, review_format, paper_pdf, language):
 
62
  return result, response.usage.total_tokens
63
 
64
  def extract_chapter(self, pdf_path):
65
+ # 创建一个PDF阅读器对象
66
+ pdf_reader = PyPDF2.PdfReader(pdf_path)
67
+ # 获取PDF的总页数
68
+ num_pages = len(pdf_reader.pages)
69
+ # 初始化提取状态和提取文本
70
+ extraction_started = False
71
+ extracted_text = ""
72
+ # 遍历PDF中的每一页
73
+ for page_number in range(num_pages):
74
+ page = pdf_reader.pages[page_number]
75
+ page_text = page.extract_text()
76
+
77
+ # 如果找到了章节标题,开始提取
78
+ if 'Abstract'.lower() in page_text.lower() and not extraction_started:
79
+ extraction_started = True
80
+ page_number_start = page_number
81
+ # 如果提取已开始,将页面文本添加到提取文本中
82
+ if extraction_started:
83
+ extracted_text += page_text
84
+ # 如果找到下一章节标题,停止提取
85
+ if page_number_start + 1 < page_number:
86
+ break
 
87
  return extracted_text
88
 
89
  def main(api, review_format, paper_pdf, language):