Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
-
"""
|
3 |
|
4 |
Automatically generated by Colaboratory.
|
5 |
|
@@ -9,25 +9,42 @@ Original file is located at
|
|
9 |
|
10 |
# !pip install gradio
|
11 |
|
12 |
-
|
|
|
|
|
13 |
"""
|
14 |
-
Estimates the training cost of a large language model.
|
15 |
|
16 |
Args:
|
|
|
|
|
17 |
- number_of_parameters (int): The number of parameters in the model.
|
18 |
- number_of_tokens (int): The number of tokens to train on.
|
19 |
-
- gpu_throughput (float, optional): The peak throughput of the GPU in FLOPs/sec. Default is 312 TFLOPs/sec for A100 GPUs.
|
20 |
- utilization_rate (float, optional): The utilization rate of the GPU (0 < utilization_rate ≤ 1). Default is 0.5 (50%).
|
21 |
- overhead (float, optional): Multiplier to account for overhead and additional costs (1 + overhead percentage). Default is 1.10 (10% overhead).
|
22 |
- cost_per_gpu_hour (float, optional): The cost per hour of using the GPU. Default is $1.85/hour.
|
23 |
|
24 |
Returns:
|
25 |
- float: The estimated total cost of training the model.
|
|
|
|
|
26 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
# Calculate the total number of FLOPs required for training
|
28 |
total_flops = 6 * number_of_parameters * number_of_tokens
|
29 |
|
30 |
-
# Calculate the number of hours required on the
|
31 |
gpu_hours = total_flops / (gpu_throughput * 3600)
|
32 |
|
33 |
# Adjust for the actual utilization of the GPUs
|
@@ -41,38 +58,40 @@ def estimate_training_cost(number_of_parameters, number_of_tokens, gpu_throughpu
|
|
41 |
|
42 |
return total_cost
|
43 |
|
44 |
-
|
45 |
-
# Let's say we have a model with 70 billion parameters and it's trained on 2 trillion tokens
|
46 |
-
# The default values for the other parameters are used in this example
|
47 |
-
total_cost = estimate_training_cost(number_of_parameters=70e9, number_of_tokens=2e12)
|
48 |
-
total_cost
|
49 |
-
|
50 |
-
import gradio as gr
|
51 |
-
|
52 |
-
# Assume the function estimate_training_cost is already defined as per the previous discussion.
|
53 |
-
|
54 |
-
def gradio_interface(number_of_parameters, number_of_tokens, utilization_rate, overhead, cost_per_gpu_hour):
|
55 |
-
# Convert string inputs to correct types
|
56 |
number_of_parameters = float(number_of_parameters) * 1e9 # Convert from billions to actual number
|
57 |
number_of_tokens = float(number_of_tokens) * 1e12 # Convert from trillions to actual number
|
58 |
utilization_rate = float(utilization_rate)
|
59 |
overhead = float(overhead)
|
60 |
cost_per_gpu_hour = float(cost_per_gpu_hour)
|
61 |
|
62 |
-
|
63 |
-
cost = estimate_training_cost(number_of_parameters, number_of_tokens, utilization_rate=utilization_rate, overhead=overhead, cost_per_gpu_hour=cost_per_gpu_hour)
|
64 |
-
|
65 |
-
# Return the result as a formatted string
|
66 |
return f"The estimated training cost is ${cost:,.2f}"
|
67 |
|
|
|
|
|
|
|
68 |
# Define the title and description for the Gradio app
|
69 |
title = "<h2 style='text-align: center;'>LLM Training Cost Calculator</h2>"
|
70 |
-
description = "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
-
# Create the Gradio interface with title and description
|
73 |
iface = gr.Interface(
|
74 |
fn=gradio_interface,
|
75 |
inputs=[
|
|
|
|
|
76 |
gr.Textbox(label="Number of Parameters (in billions)", value="70"),
|
77 |
gr.Textbox(label="Number of Tokens (in trillions)", value="2"),
|
78 |
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"),
|
@@ -85,6 +104,4 @@ iface = gr.Interface(
|
|
85 |
article="<p style='text-align: center;'>Developed with ❤️ by Elfilali Ali</p>"
|
86 |
)
|
87 |
|
88 |
-
# Run the interface
|
89 |
iface.launch()
|
90 |
-
|
|
|
1 |
# -*- coding: utf-8 -*-
|
2 |
+
"""LLM Training Cost Calculator App.ipynb
|
3 |
|
4 |
Automatically generated by Colaboratory.
|
5 |
|
|
|
9 |
|
10 |
# !pip install gradio
|
11 |
|
12 |
+
import gradio as gr
|
13 |
+
|
14 |
+
def estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=0.5, overhead=1.10, cost_per_gpu_hour=1.85):
|
15 |
"""
|
16 |
+
Estimates the training cost of a large language model based on the selected GPU and precision.
|
17 |
|
18 |
Args:
|
19 |
+
- gpu_choice (str): The choice of GPU, e.g., 'A100 80GB PCIe', 'V100', etc.
|
20 |
+
- precision (str): The precision level for the GPU, e.g., 'bf16', 'tf32', 'tensor'.
|
21 |
- number_of_parameters (int): The number of parameters in the model.
|
22 |
- number_of_tokens (int): The number of tokens to train on.
|
|
|
23 |
- utilization_rate (float, optional): The utilization rate of the GPU (0 < utilization_rate ≤ 1). Default is 0.5 (50%).
|
24 |
- overhead (float, optional): Multiplier to account for overhead and additional costs (1 + overhead percentage). Default is 1.10 (10% overhead).
|
25 |
- cost_per_gpu_hour (float, optional): The cost per hour of using the GPU. Default is $1.85/hour.
|
26 |
|
27 |
Returns:
|
28 |
- float: The estimated total cost of training the model.
|
29 |
+
|
30 |
+
The function dynamically adjusts the GPU throughput based on the selected GPU and precision. The throughput values are predefined for each GPU and precision combination. This estimation assumes a linear scaling of training cost with the number of parameters and tokens.
|
31 |
"""
|
32 |
+
|
33 |
+
gpu_throughputs = {
|
34 |
+
'A100 80GB PCIe': {'bf16': 312e12, 'tf32': 156e12},
|
35 |
+
'A100 80GB SXM': {'bf16': 624e12, 'tf32': 312e12},
|
36 |
+
'V100': {'tensor': 130e12}, # Assuming only the deep learning performance for V100
|
37 |
+
'H100 SXM': {'bf16': 1979e12, 'tf32': 989e12},
|
38 |
+
'H100 PCIe': {'bf16': 1513e12, 'tf32': 756e12}
|
39 |
+
}
|
40 |
+
|
41 |
+
# Get the correct GPU throughput
|
42 |
+
gpu_throughput = gpu_throughputs[gpu_choice][precision]
|
43 |
+
|
44 |
# Calculate the total number of FLOPs required for training
|
45 |
total_flops = 6 * number_of_parameters * number_of_tokens
|
46 |
|
47 |
+
# Calculate the number of hours required on the selected GPU
|
48 |
gpu_hours = total_flops / (gpu_throughput * 3600)
|
49 |
|
50 |
# Adjust for the actual utilization of the GPUs
|
|
|
58 |
|
59 |
return total_cost
|
60 |
|
61 |
+
def gradio_interface(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate, overhead, cost_per_gpu_hour):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
number_of_parameters = float(number_of_parameters) * 1e9 # Convert from billions to actual number
|
63 |
number_of_tokens = float(number_of_tokens) * 1e12 # Convert from trillions to actual number
|
64 |
utilization_rate = float(utilization_rate)
|
65 |
overhead = float(overhead)
|
66 |
cost_per_gpu_hour = float(cost_per_gpu_hour)
|
67 |
|
68 |
+
cost = estimate_training_cost(gpu_choice, precision, number_of_parameters, number_of_tokens, utilization_rate=utilization_rate, overhead=overhead, cost_per_gpu_hour=cost_per_gpu_hour)
|
|
|
|
|
|
|
69 |
return f"The estimated training cost is ${cost:,.2f}"
|
70 |
|
71 |
+
gpu_choices = ["A100 80GB PCIe", "A100 80GB SXM", "V100", "H100 SXM", "H100 PCIe"]
|
72 |
+
default_precisions = ['bf16', 'tf32', 'tensor', 'bf16', 'bf16'] # Default precision for each GPU
|
73 |
+
|
74 |
# Define the title and description for the Gradio app
|
75 |
title = "<h2 style='text-align: center;'>LLM Training Cost Calculator</h2>"
|
76 |
+
description = """
|
77 |
+
<p style='text-align: center;'>Estimate the cost of training large language models (LLM). This tool helps you calculate the cost based on model parameters, tokens, and GPU selections with various precision options. Select a GPU and the precision level to get an accurate cost estimate.</p>
|
78 |
+
<p><strong>Available GPUs and Precisions:</strong></p>
|
79 |
+
<ul>
|
80 |
+
<li><strong>A100 80GB PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
|
81 |
+
<li><strong>A100 80GB SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
|
82 |
+
<li><strong>V100:</strong> Uses Deep Learning performance with Tensor Cores (tensor) as the default and only precision.</li>
|
83 |
+
<li><strong>H100 SXM:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
|
84 |
+
<li><strong>H100 PCIe:</strong> Available precisions - BFLOAT16 (bf16), Tensor Float 32 (tf32).</li>
|
85 |
+
</ul>
|
86 |
+
<p>The choice of GPU and precision impacts the throughput, affecting training time and cost. BFLOAT16 is generally faster and more cost-effective, while Tensor Float 32 offers higher precision. The V100 GPU is optimized for Deep Learning with Tensor Cores.</p>
|
87 |
+
<p style='text-align: center;'>We plan to extend this calculator to include calculating the cost of fine-tuning models using strategies like LoRA or QLoRA. Stay tuned for updates where you'll be able to input the model ID from the Hugging Face Hub, select the fine-tuning strategy, and specify quantization details if QLoRA is chosen.</p>
|
88 |
+
"""
|
89 |
|
|
|
90 |
iface = gr.Interface(
|
91 |
fn=gradio_interface,
|
92 |
inputs=[
|
93 |
+
gr.Dropdown(choices=gpu_choices, label="Select GPU", value='A100 80GB PCIe'),
|
94 |
+
gr.Dropdown(choices=['bf16', 'tf32', 'tensor'], label="Select Precision", value='bf16'),
|
95 |
gr.Textbox(label="Number of Parameters (in billions)", value="70"),
|
96 |
gr.Textbox(label="Number of Tokens (in trillions)", value="2"),
|
97 |
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"),
|
|
|
104 |
article="<p style='text-align: center;'>Developed with ❤️ by Elfilali Ali</p>"
|
105 |
)
|
106 |
|
|
|
107 |
iface.launch()
|
|