File size: 1,936 Bytes
925ac7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import asyncio
from pyppeteer import launch
from bs4 import BeautifulSoup
import pandas as pd  # Add this import for DataFrame functionality

async def main():
    # Launch the browser in headless mode
    browser = await launch(headless=True)
    page = await browser.newPage()

 
    # Find the table in the HTML
    await page.goto('https://llm-price.com/')
    await page.waitForSelector('#tableBody')
    content = await page.content()
    soup = BeautifulSoup(content, 'html.parser')
    table = soup.find('table')


    # Check if the table is found
    if table:
        model_names = []
        # providers = [] # Skip providers, only uses .svg logos on the website, use sources (col4) instead
        input_tokens = []
        output_tokens = []
        sources = []
        updated_times = []

        # Iterate through the rows of the table
        for row in table.find_all('tr')[1:]:  # Skip the header row
            cols = row.find_all('td')
            if len(cols) > 0:
                model_names.append(cols[0].text.strip())
                # providers.append(cols[1].text.strip())
                input_tokens.append(cols[2].text.strip())
                output_tokens.append(cols[3].text.strip())
                sources.append(cols[4].text.strip())
                updated_times.append(cols[5].text.strip())

        # Create a DataFrame from the collected data
        data = {
            'Model Name': model_names,
            # 'Providers': providers,
            '1 M Input Tokens': input_tokens,
            '1 M Output Tokens': output_tokens,
            'Source': sources,
            'Updated Time': updated_times
        }

        df = pd.DataFrame(data)
        return df
    else:
        print("Table not found.")

    # Close the browser
    await browser.close()

# Run the main function
def get_pricing_df():
    price_df = asyncio.get_event_loop().run_until_complete(main())
    return price_df