Spaces:
Sleeping
Sleeping
File size: 1,182 Bytes
8375908 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from bs4 import BeautifulSoup
import pandas as pd
# Open and read the HTML file
with open("rff.html", 'r', encoding='utf-8') as file:
html_content = file.read()
# Parse the HTML content
soup = BeautifulSoup(html_content, 'html.parser')
# Find all article elements
articles = soup.find_all('article', class_='card4')
# Initialize a list to store the data
data = []
# Loop through each article to extract the required information
for article in articles:
# Extract the name from the span with class '-a:1 -as:3 -as:t1'
name_span = article.find('span', class_='-a:1 -as:3 -as:t1')
name = name_span.text.strip() if name_span else 'N/A' # Handle cases where the span might not exist
# Extract the state from the paragraph with class 'card4-role -t:11'
state_paragraph = article.find('p', class_='card4-role -t:11')
state = state_paragraph.text.strip() if state_paragraph else 'N/A' # Handle cases where the paragraph might not exist
# Append the extracted data to the list
data.append({'Name': name, 'State': state})
# Create a DataFrame from the data
df = pd.DataFrame(data)
df.tail()
df.to_csv("rff_endorsements.csv", index = False) |