Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -756,7 +756,7 @@ def scrape_text(url, proxies) -> str:
|
|
756 |
script.extract()
|
757 |
text = soup.get_text()
|
758 |
|
759 |
-
# 截取文本,限制最多
|
760 |
text = text[:700]
|
761 |
|
762 |
# 丢弃字符数小于10的行和连续的空格键、回车键之间字符数小于10的部分
|
@@ -765,7 +765,7 @@ def scrape_text(url, proxies) -> str:
|
|
765 |
for line in lines:
|
766 |
# 去除多余的空格
|
767 |
cleaned_line = ' '.join(line.split())
|
768 |
-
if len(cleaned_line) >=
|
769 |
selected_lines.append(cleaned_line)
|
770 |
|
771 |
# 拼接选中的行
|
|
|
756 |
script.extract()
|
757 |
text = soup.get_text()
|
758 |
|
759 |
+
# 截取文本,限制最多500个字符
|
760 |
text = text[:700]
|
761 |
|
762 |
# 丢弃字符数小于10的行和连续的空格键、回车键之间字符数小于10的部分
|
|
|
765 |
for line in lines:
|
766 |
# 去除多余的空格
|
767 |
cleaned_line = ' '.join(line.split())
|
768 |
+
if len(cleaned_line) >= 37:
|
769 |
selected_lines.append(cleaned_line)
|
770 |
|
771 |
# 拼接选中的行
|