|
import asyncio |
|
from pyppeteer import launch |
|
from bs4 import BeautifulSoup |
|
import pandas as pd |
|
|
|
async def main(): |
|
|
|
browser = await launch(headless=True) |
|
page = await browser.newPage() |
|
|
|
|
|
|
|
await page.goto('https://llm-price.com/') |
|
await page.waitForSelector('#tableBody') |
|
content = await page.content() |
|
soup = BeautifulSoup(content, 'html.parser') |
|
table = soup.find('table') |
|
|
|
|
|
|
|
if table: |
|
model_names = [] |
|
|
|
input_tokens = [] |
|
output_tokens = [] |
|
sources = [] |
|
updated_times = [] |
|
|
|
|
|
for row in table.find_all('tr')[1:]: |
|
cols = row.find_all('td') |
|
if len(cols) > 0: |
|
model_names.append(cols[0].text.strip()) |
|
|
|
input_tokens.append(cols[2].text.strip()) |
|
output_tokens.append(cols[3].text.strip()) |
|
sources.append(cols[4].text.strip()) |
|
updated_times.append(cols[5].text.strip()) |
|
|
|
|
|
data = { |
|
'Model Name': model_names, |
|
|
|
'1 M Input Tokens': input_tokens, |
|
'1 M Output Tokens': output_tokens, |
|
'Source': sources, |
|
'Updated Time': updated_times |
|
} |
|
|
|
df = pd.DataFrame(data) |
|
return df |
|
else: |
|
print("Table not found.") |
|
|
|
|
|
await browser.close() |
|
|
|
|
|
def get_pricing_df(): |
|
price_df = asyncio.get_event_loop().run_until_complete(main()) |
|
return price_df |