File size: 555 Bytes
a636bcb
af2c647
 
 
 
 
 
 
 
 
3dda344
 
c7c538d
 
af2c647
 
 
 
 
c7c538d
af2c647
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
IGNORE_TAGS = ["script", "style", "button"]
IGNORE_CLASSES = [
    "sidebar",
    "footer",
    "related",
    "comment",
    "topbar",
    # "menu",
    "offcanvas",
    "navbar",
    # 163.com
    "post_(top)|(side)|(recommends)|(crumb)|(statement)|(next)|(jubao)",
    "ntes\-.*nav",
    "nav\-bottom",
]

IGNORE_HOSTS = [
    "weibo.com",
    "hymson.com",
    "yahoo.com",
]

REQUESTS_HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62",
}