Spaces:

veeps
/

unstoppable_app

Sleeping

unstoppable_app / get_rff_endorsements.py

veeps

pulling rff endorsements

8375908 about 2 months ago

1.18 kB

	from bs4 import BeautifulSoup
	import pandas as pd

	# Open and read the HTML file
	with open("rff.html", 'r', encoding='utf-8') as file:
	html_content = file.read()


	# Parse the HTML content
	soup = BeautifulSoup(html_content, 'html.parser')


	# Find all article elements
	articles = soup.find_all('article', class_='card4')

	# Initialize a list to store the data
	data = []

	# Loop through each article to extract the required information
	for article in articles:
	# Extract the name from the span with class '-a:1 -as:3 -as:t1'
	name_span = article.find('span', class_='-a:1 -as:3 -as:t1')
	name = name_span.text.strip() if name_span else 'N/A' # Handle cases where the span might not exist

	# Extract the state from the paragraph with class 'card4-role -t:11'
	state_paragraph = article.find('p', class_='card4-role -t:11')
	state = state_paragraph.text.strip() if state_paragraph else 'N/A' # Handle cases where the paragraph might not exist

	# Append the extracted data to the list
	data.append({'Name': name, 'State': state})

	# Create a DataFrame from the data
	df = pd.DataFrame(data)
	df.tail()

	df.to_csv("rff_endorsements.csv", index = False)