from bs4 import BeautifulSoup import pandas as pd # Open and read the HTML file with open("rff.html", 'r', encoding='utf-8') as file: html_content = file.read() # Parse the HTML content soup = BeautifulSoup(html_content, 'html.parser') # Find all article elements articles = soup.find_all('article', class_='card4') # Initialize a list to store the data data = [] # Loop through each article to extract the required information for article in articles: # Extract the name from the span with class '-a:1 -as:3 -as:t1' name_span = article.find('span', class_='-a:1 -as:3 -as:t1') name = name_span.text.strip() if name_span else 'N/A' # Handle cases where the span might not exist # Extract the state from the paragraph with class 'card4-role -t:11' state_paragraph = article.find('p', class_='card4-role -t:11') state = state_paragraph.text.strip() if state_paragraph else 'N/A' # Handle cases where the paragraph might not exist # Append the extracted data to the list data.append({'Name': name, 'State': state}) # Create a DataFrame from the data df = pd.DataFrame(data) df.tail() df.to_csv("rff_endorsements.csv", index = False)