Spaces:
Runtime error
Runtime error
Yijun-Yang
commited on
Commit
·
f8ea2a4
1
Parent(s):
0573e7b
Add .gitignore with repodir and workdir
Browse files- .gitignore +2 -0
- huixiangdou/service/findarticles.py +11 -11
.gitignore
CHANGED
@@ -2,3 +2,5 @@ __pycache__/
|
|
2 |
*.pyc
|
3 |
.ipynb_checkpoints
|
4 |
*/.ipynb_checkpoints/*
|
|
|
|
|
|
2 |
*.pyc
|
3 |
.ipynb_checkpoints
|
4 |
*/.ipynb_checkpoints/*
|
5 |
+
repodir/
|
6 |
+
workdir/
|
huixiangdou/service/findarticles.py
CHANGED
@@ -34,17 +34,17 @@ class ArticleRetrieval:
|
|
34 |
return pmc_ids
|
35 |
|
36 |
# 解析XML文件
|
37 |
-
def _get_all_text(self, element):
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
|
49 |
## 清洗XML文件
|
50 |
def _clean_xml(self,txt):
|
|
|
34 |
return pmc_ids
|
35 |
|
36 |
# 解析XML文件
|
37 |
+
def _get_all_text(self, element):
|
38 |
+
"""递归获取XML元素及其所有子元素的文本内容。确保element不为None."""
|
39 |
+
if element is None:
|
40 |
+
return ""
|
41 |
+
|
42 |
+
text = element.text or ""
|
43 |
+
for child in element:
|
44 |
+
text += self._get_all_text(child)
|
45 |
+
if child is not None and child.tail:
|
46 |
+
text += child.tail
|
47 |
+
return text
|
48 |
|
49 |
## 清洗XML文件
|
50 |
def _clean_xml(self,txt):
|