fb700 commited on
Commit
9605eed
1 Parent(s): ca2372c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -756,7 +756,7 @@ def scrape_text(url, proxies) -> str:
756
  script.extract()
757
  text = soup.get_text()
758
 
759
- # 截取文本,限制最多700个字符
760
  text = text[:700]
761
 
762
  # 丢弃字符数小于10的行和连续的空格键、回车键之间字符数小于10的部分
@@ -765,7 +765,7 @@ def scrape_text(url, proxies) -> str:
765
  for line in lines:
766
  # 去除多余的空格
767
  cleaned_line = ' '.join(line.split())
768
- if len(cleaned_line) >= 47:
769
  selected_lines.append(cleaned_line)
770
 
771
  # 拼接选中的行
 
756
  script.extract()
757
  text = soup.get_text()
758
 
759
+ # 截取文本,限制最多500个字符
760
  text = text[:700]
761
 
762
  # 丢弃字符数小于10的行和连续的空格键、回车键之间字符数小于10的部分
 
765
  for line in lines:
766
  # 去除多余的空格
767
  cleaned_line = ' '.join(line.split())
768
+ if len(cleaned_line) >= 37:
769
  selected_lines.append(cleaned_line)
770
 
771
  # 拼接选中的行