import requests import pandas as pd from bs4 import BeautifulSoup def extract_div_contents_from_url(url): response = requests.get(url) if response.status_code != 200: print(f"Error: Received status code {response.status_code} for URL: {url}") return pd.DataFrame(columns=['title', 'text_url', 'deletion_discussion', 'label', 'confirmation', 'discussion', 'verdict']) soup = BeautifulSoup(response.content, 'html.parser') div_classes = ['boilerplate afd vfd xfd-closed', 'boilerplate afd vfd xfd-closed archived mw-archivedtalk'] divs = [] for div_class in div_classes: divs.extend(soup.find_all('div', class_=div_class)) url_fragment = url.split('#')[-1].replace('_', ' ') data = [] for div in divs: try: title = None text_url = None # Extract title and text_url title_tag = div.find('a') if title_tag: title_span = div.find('span', {'data-mw-comment-start': True}) if title_span: title_anchor = title_span.find_next_sibling('a') if title_anchor: title = title_anchor.text text_url = 'https://en.wikipedia.org' + title_anchor['href'] else: title = title_tag.text text_url = 'https://en.wikipedia.org' + title_tag['href'] if title == 'talk page' or title is None: heading_tag = div.find('div', class_='mw-heading mw-heading3') if heading_tag: title_tag = heading_tag.find('a') if title_tag: title = title_tag.text text_url = 'https://en.wikipedia.org' + title_tag['href'] if not title: continue if title.lower() != url_fragment.lower(): continue deletion_discussion = div.prettify() # Extract label label = '' verdict_tag = div.find('p') if verdict_tag: label_b_tag = verdict_tag.find('b') if label_b_tag: label = label_b_tag.text.strip() # Extract confirmation confirmation = '' discussion_tag = div.find('dd') if discussion_tag: discussion_tag_i = discussion_tag.find('i') if discussion_tag_i: confirmation_b_tag = discussion_tag_i.find('b') if confirmation_b_tag: confirmation = confirmation_b_tag.text.strip() # Split deletion_discussion into discussion and verdict parts = deletion_discussion.split('