Spaces:
Running
Running
Commit
β’
1b7fb05
1
Parent(s):
4f5bf6c
add t4 to leaderboard
Browse files- .gitignore +2 -1
- README.md +59 -1
- app.py +1 -0
- src/llm_perf.py +8 -3
.gitignore
CHANGED
@@ -4,4 +4,5 @@ __pycache__/
|
|
4 |
*ipynb
|
5 |
.vscode/
|
6 |
|
7 |
-
dataset/
|
|
|
|
4 |
*ipynb
|
5 |
.vscode/
|
6 |
|
7 |
+
dataset/
|
8 |
+
.venv
|
README.md
CHANGED
@@ -11,4 +11,62 @@ license: apache-2.0
|
|
11 |
tags: [llm perf leaderboard, llm performance leaderboard, llm, performance, leaderboard]
|
12 |
---
|
13 |
|
14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
tags: [llm perf leaderboard, llm performance leaderboard, llm, performance, leaderboard]
|
12 |
---
|
13 |
|
14 |
+
# LLM-perf leaderboard
|
15 |
+
|
16 |
+
## π About
|
17 |
+
The π€ LLM-Perf Leaderboard ποΈ is a laderboard at the intersection of quality and performance.
|
18 |
+
Its aim is to benchmark the performance (latency, throughput, memory & energy)
|
19 |
+
of Large Language Models (LLMs) with different hardwares, backends and optimizations
|
20 |
+
using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
|
21 |
+
|
22 |
+
Anyone from the community can request a new base model or hardware/backend/optimization
|
23 |
+
configuration for automated benchmarking:
|
24 |
+
|
25 |
+
- Model evaluation requests should be made in the
|
26 |
+
[π€ Open LLM Leaderboard π
](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
|
27 |
+
we scrape the [list of canonical base models](https://github.com/huggingface/optimum-benchmark/blob/main/llm_perf/utils.py) from there.
|
28 |
+
- Hardware/Backend/Optimization configuration requests should be made in the
|
29 |
+
[π€ LLM-Perf Leaderboard ποΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or
|
30 |
+
[Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
|
31 |
+
|
32 |
+
## βοΈ Details
|
33 |
+
|
34 |
+
- To avoid communication-dependent results, only one GPU is used.
|
35 |
+
- Score is the average evaluation score obtained from the [π€ Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
36 |
+
- LLMs are running on a singleton batch with a prompt size of 256 and generating a 64 tokens for at least 10 iterations and 10 seconds.
|
37 |
+
- Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.
|
38 |
+
- We measure three types of memory: Max Allocated Memory, Max Reserved Memory and Max Used Memory. The first two being reported by PyTorch and the last one being observed using PyNVML.
|
39 |
+
|
40 |
+
All of our benchmarks are ran by this single script
|
41 |
+
[benchmark_cuda_pytorch.py](https://github.com/huggingface/optimum-benchmark/blob/llm-perf/llm-perf/benchmark_cuda_pytorch.py)
|
42 |
+
using the power of [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) to garantee reproducibility and consistency.
|
43 |
+
|
44 |
+
## π How to run locally
|
45 |
+
|
46 |
+
To run the LLM-Perf Leaderboard locally on your machine, follow these steps:
|
47 |
+
|
48 |
+
### 1. Clone the Repository
|
49 |
+
|
50 |
+
First, clone the repository to your local machine:
|
51 |
+
|
52 |
+
```bash
|
53 |
+
git clone https://github.com/huggingface/optimum-benchmark.git
|
54 |
+
cd optimum-benchmark
|
55 |
+
```
|
56 |
+
|
57 |
+
### 2. Install the Required Dependencies
|
58 |
+
|
59 |
+
Install the necessary Python packages listed in the requirements.txt file:
|
60 |
+
`pip install -r requirements.txt`
|
61 |
+
|
62 |
+
### 3. Run the Application
|
63 |
+
|
64 |
+
You can run the Gradio application in one of the following ways:
|
65 |
+
- Option 1: Using Python
|
66 |
+
`python app.py`
|
67 |
+
- Option 2: Using Gradio CLI (include hot-reload)
|
68 |
+
`gradio app.py`
|
69 |
+
|
70 |
+
### 4. Access the Application
|
71 |
+
|
72 |
+
Once the application is running, you can access it locally in your web browser at http://127.0.0.1:7860/
|
app.py
CHANGED
@@ -18,6 +18,7 @@ from src.panel import (
|
|
18 |
MACHINE_TO_HARDWARE = {
|
19 |
"1xA10": "A10-24GB-150W π₯οΈ",
|
20 |
"1xA100": "A100-80GB-275W π₯οΈ",
|
|
|
21 |
# "1xH100": "H100-80GB-700W π₯οΈ",
|
22 |
}
|
23 |
|
|
|
18 |
MACHINE_TO_HARDWARE = {
|
19 |
"1xA10": "A10-24GB-150W π₯οΈ",
|
20 |
"1xA100": "A100-80GB-275W π₯οΈ",
|
21 |
+
"1xT4": "T4-16GB-70W π₯οΈ",
|
22 |
# "1xH100": "H100-80GB-700W π₯οΈ",
|
23 |
}
|
24 |
|
src/llm_perf.py
CHANGED
@@ -109,11 +109,16 @@ def processed_llm_perf_df(llm_perf_df):
|
|
109 |
|
110 |
|
111 |
def get_llm_perf_df(machine: str = "1xA10"):
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
114 |
else:
|
115 |
llm_perf_df = get_raw_llm_perf_df(machine)
|
116 |
llm_perf_df = processed_llm_perf_df(llm_perf_df)
|
117 |
-
llm_perf_df.to_csv(f"llm-perf-leaderboard-{machine}.csv", index=False)
|
118 |
|
119 |
return llm_perf_df
|
|
|
109 |
|
110 |
|
111 |
def get_llm_perf_df(machine: str = "1xA10"):
|
112 |
+
dataset_directory = 'dataset'
|
113 |
+
|
114 |
+
if not os.path.exists(dataset_directory):
|
115 |
+
os.makedirs(dataset_directory)
|
116 |
+
|
117 |
+
if os.path.exists(f"{dataset_directory}/llm-perf-leaderboard-{machine}.csv"):
|
118 |
+
llm_perf_df = pd.read_csv(f"{dataset_directory}/llm-perf-leaderboard-{machine}.csv")
|
119 |
else:
|
120 |
llm_perf_df = get_raw_llm_perf_df(machine)
|
121 |
llm_perf_df = processed_llm_perf_df(llm_perf_df)
|
122 |
+
llm_perf_df.to_csv(f"{dataset_directory}/llm-perf-leaderboard-{machine}.csv", index=False)
|
123 |
|
124 |
return llm_perf_df
|