hsuvaskakoty commited on
Commit
13f6685
1 Parent(s): a0519f7

Upload data_prep.py

Browse files
Files changed (1) hide show
  1. data_prep.py +1 -1
data_prep.py CHANGED
@@ -9,7 +9,7 @@ def extract_div_contents_from_url(url):
9
  return pd.DataFrame(columns=['title', 'text_url', 'deletion_discussion', 'label', 'confirmation', 'discussion', 'verdict'])
10
 
11
  soup = BeautifulSoup(response.content, 'html.parser')
12
- div_classes = ['boilerplate afd vfd xfd-closed', 'boilerplate afd vfd xfd-closed archived mw-archivedtalk']
13
  divs = []
14
  for div_class in div_classes:
15
  divs.extend(soup.find_all('div', class_=div_class))
 
9
  return pd.DataFrame(columns=['title', 'text_url', 'deletion_discussion', 'label', 'confirmation', 'discussion', 'verdict'])
10
 
11
  soup = BeautifulSoup(response.content, 'html.parser')
12
+ div_classes = ["mw-heading mw-heading3",'boilerplate afd vfd xfd-closed', 'boilerplate afd vfd xfd-closed archived mw-archivedtalk']
13
  divs = []
14
  for div_class in div_classes:
15
  divs.extend(soup.find_all('div', class_=div_class))