import asyncio from pyppeteer import launch from bs4 import BeautifulSoup import pandas as pd # Add this import for DataFrame functionality async def main(): # Launch the browser in headless mode browser = await launch(headless=True) page = await browser.newPage() # Find the table in the HTML await page.goto('https://llm-price.com/') await page.waitForSelector('#tableBody') content = await page.content() soup = BeautifulSoup(content, 'html.parser') table = soup.find('table') # Check if the table is found if table: model_names = [] # providers = [] # Skip providers, only uses .svg logos on the website, use sources (col4) instead input_tokens = [] output_tokens = [] sources = [] updated_times = [] # Iterate through the rows of the table for row in table.find_all('tr')[1:]: # Skip the header row cols = row.find_all('td') if len(cols) > 0: model_names.append(cols[0].text.strip()) # providers.append(cols[1].text.strip()) input_tokens.append(cols[2].text.strip()) output_tokens.append(cols[3].text.strip()) sources.append(cols[4].text.strip()) updated_times.append(cols[5].text.strip()) # Create a DataFrame from the collected data data = { 'Model Name': model_names, # 'Providers': providers, '1 M Input Tokens': input_tokens, '1 M Output Tokens': output_tokens, 'Source': sources, 'Updated Time': updated_times } df = pd.DataFrame(data) return df else: print("Table not found.") # Close the browser await browser.close() # Run the main function def get_pricing_df(): price_df = asyncio.get_event_loop().run_until_complete(main()) return price_df