Spaces:
Sleeping
Sleeping
:zap: [Enhance] WebpageContentExtractor: Escape dash, and ignore
Browse files
networks/network_configs.py
CHANGED
@@ -10,13 +10,14 @@ IGNORE_CLASSES = [
|
|
10 |
"navbar",
|
11 |
# 163.com
|
12 |
"post_(top)|(side)|(recommends)|(crumb)|(statement)|(next)|(jubao)",
|
13 |
-
"ntes
|
14 |
-
"nav
|
15 |
]
|
16 |
|
17 |
IGNORE_HOSTS = [
|
18 |
"weibo.com",
|
19 |
"hymson.com",
|
|
|
20 |
]
|
21 |
|
22 |
REQUESTS_HEADERS = {
|
|
|
10 |
"navbar",
|
11 |
# 163.com
|
12 |
"post_(top)|(side)|(recommends)|(crumb)|(statement)|(next)|(jubao)",
|
13 |
+
"ntes\-.*nav",
|
14 |
+
"nav\-bottom",
|
15 |
]
|
16 |
|
17 |
IGNORE_HOSTS = [
|
18 |
"weibo.com",
|
19 |
"hymson.com",
|
20 |
+
"yahoo.com",
|
21 |
]
|
22 |
|
23 |
REQUESTS_HEADERS = {
|