Spaces:
Running
Running
Commit
·
a04bf88
1
Parent(s):
2ccd2a4
Update app.py
Browse files
app.py
CHANGED
|
@@ -60,6 +60,7 @@ class Paper:
|
|
| 60 |
self.section_names = [] # 段落标题
|
| 61 |
self.section_texts = {} # 段落内容
|
| 62 |
self.abs = abs
|
|
|
|
| 63 |
if title == '':
|
| 64 |
self.pdf = fitz.open(self.path) # pdf文档
|
| 65 |
self.title = self.get_title()
|
|
@@ -79,7 +80,17 @@ class Paper:
|
|
| 79 |
print("section_page_dict", self.section_page_dict)
|
| 80 |
self.section_text_dict = self._get_all_page() # 段落与内容的对应字典
|
| 81 |
self.section_text_dict.update({"title": self.title})
|
| 82 |
-
self.pdf.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
def get_image_path(self, image_path=''):
|
| 85 |
"""
|
|
@@ -195,6 +206,7 @@ class Paper:
|
|
| 195 |
cur_title += cur_string
|
| 196 |
else:
|
| 197 |
cur_title += ' ' + cur_string
|
|
|
|
| 198 |
# break
|
| 199 |
title = cur_title.replace('\n', ' ')
|
| 200 |
return title
|
|
@@ -433,6 +445,7 @@ class Reader:
|
|
| 433 |
text += 'Title:' + paper.title
|
| 434 |
text += 'Url:' + paper.url
|
| 435 |
text += 'Abstrat:' + paper.abs
|
|
|
|
| 436 |
# intro
|
| 437 |
text += list(paper.section_text_dict.values())[0]
|
| 438 |
#max_token = 2500 * 4
|
|
|
|
| 60 |
self.section_names = [] # 段落标题
|
| 61 |
self.section_texts = {} # 段落内容
|
| 62 |
self.abs = abs
|
| 63 |
+
self.title_page = 0
|
| 64 |
if title == '':
|
| 65 |
self.pdf = fitz.open(self.path) # pdf文档
|
| 66 |
self.title = self.get_title()
|
|
|
|
| 80 |
print("section_page_dict", self.section_page_dict)
|
| 81 |
self.section_text_dict = self._get_all_page() # 段落与内容的对应字典
|
| 82 |
self.section_text_dict.update({"title": self.title})
|
| 83 |
+
self.pdf.close()
|
| 84 |
+
|
| 85 |
+
def get_paper_info(self):
|
| 86 |
+
first_page_text = self.pdf[self.title_page].get_text()
|
| 87 |
+
if "Abstract" in self.section_text_dict.keys():
|
| 88 |
+
abstract_text = self.section_text_dict['Abstract']
|
| 89 |
+
else:
|
| 90 |
+
abstract_text = self.abs
|
| 91 |
+
introduction_text = self.section_text_dict['Introduction']
|
| 92 |
+
first_page_text = first_page_text.replace(abstract_text, "").replace(introduction_text, "")
|
| 93 |
+
return first_page_text
|
| 94 |
|
| 95 |
def get_image_path(self, image_path=''):
|
| 96 |
"""
|
|
|
|
| 206 |
cur_title += cur_string
|
| 207 |
else:
|
| 208 |
cur_title += ' ' + cur_string
|
| 209 |
+
self.title_page = page_index
|
| 210 |
# break
|
| 211 |
title = cur_title.replace('\n', ' ')
|
| 212 |
return title
|
|
|
|
| 445 |
text += 'Title:' + paper.title
|
| 446 |
text += 'Url:' + paper.url
|
| 447 |
text += 'Abstrat:' + paper.abs
|
| 448 |
+
text += 'Paper_info:' + paper.section_text_dict['paper_info']
|
| 449 |
# intro
|
| 450 |
text += list(paper.section_text_dict.values())[0]
|
| 451 |
#max_token = 2500 * 4
|