Spaces:
Sleeping
Sleeping
hsuvaskakoty
commited on
Commit
•
13f6685
1
Parent(s):
a0519f7
Upload data_prep.py
Browse files- data_prep.py +1 -1
data_prep.py
CHANGED
@@ -9,7 +9,7 @@ def extract_div_contents_from_url(url):
|
|
9 |
return pd.DataFrame(columns=['title', 'text_url', 'deletion_discussion', 'label', 'confirmation', 'discussion', 'verdict'])
|
10 |
|
11 |
soup = BeautifulSoup(response.content, 'html.parser')
|
12 |
-
div_classes = ['boilerplate afd vfd xfd-closed', 'boilerplate afd vfd xfd-closed archived mw-archivedtalk']
|
13 |
divs = []
|
14 |
for div_class in div_classes:
|
15 |
divs.extend(soup.find_all('div', class_=div_class))
|
|
|
9 |
return pd.DataFrame(columns=['title', 'text_url', 'deletion_discussion', 'label', 'confirmation', 'discussion', 'verdict'])
|
10 |
|
11 |
soup = BeautifulSoup(response.content, 'html.parser')
|
12 |
+
div_classes = ["mw-heading mw-heading3",'boilerplate afd vfd xfd-closed', 'boilerplate afd vfd xfd-closed archived mw-archivedtalk']
|
13 |
divs = []
|
14 |
for div_class in div_classes:
|
15 |
divs.extend(soup.find_all('div', class_=div_class))
|