|
import gradio as gr |
|
|
|
def calculate_training_metrics( |
|
gpu_choice, precision, num_gpus, num_parameters, dataset_tokens, |
|
num_epochs, utilization_rate=0.5, overhead=1.10, cost_per_gpu_hour=1.85 |
|
): |
|
""" |
|
Calculates both the training time and cost for LLM training with parallel computing. |
|
|
|
Args: |
|
- gpu_choice (str): The choice of GPU model |
|
- precision (str): The precision level for training |
|
- num_gpus (int): Number of GPUs for parallel computing |
|
- num_parameters (float): Number of model parameters in billions |
|
- dataset_tokens (float): Number of tokens in the dataset |
|
- num_epochs (int): Number of training epochs |
|
- utilization_rate (float): GPU utilization rate (0 < rate ≤ 1) |
|
- overhead (float): Overhead multiplier for additional costs |
|
- cost_per_gpu_hour (float): Cost per GPU hour in dollars |
|
|
|
Returns: |
|
- tuple: (total_cost, training_days, training_hours) |
|
""" |
|
|
|
|
|
gpu_throughputs = { |
|
'A100 80GB PCIe': {'bf16': 312e12, 'tf32': 156e12}, |
|
'A100 80GB SXM': {'bf16': 624e12, 'tf32': 312e12}, |
|
'V100': {'tensor': 130e12}, |
|
'H100 SXM': {'bf16': 1979e12, 'tf32': 989e12}, |
|
'H100 PCIe': {'bf16': 1513e12, 'tf32': 756e12} |
|
} |
|
|
|
|
|
base_throughput = gpu_throughputs[gpu_choice][precision] |
|
|
|
|
|
|
|
parallel_efficiency = 0.9 |
|
effective_throughput = base_throughput * num_gpus * parallel_efficiency |
|
|
|
|
|
total_tokens = dataset_tokens * num_epochs |
|
|
|
|
|
total_flops = 6 * num_parameters * total_tokens |
|
|
|
|
|
compute_hours = total_flops / (effective_throughput * 3600) |
|
|
|
|
|
actual_hours = (compute_hours / utilization_rate) * overhead |
|
|
|
|
|
training_days = int(actual_hours // 24) |
|
training_hours = actual_hours % 24 |
|
|
|
|
|
total_cost = actual_hours * cost_per_gpu_hour * num_gpus |
|
|
|
return total_cost, training_days, training_hours |
|
|
|
def gradio_interface( |
|
gpu_choice, precision, num_gpus, num_parameters, dataset_tokens, |
|
num_epochs, utilization_rate, overhead, cost_per_gpu_hour |
|
): |
|
|
|
num_parameters = float(num_parameters) * 1e9 |
|
dataset_tokens = float(dataset_tokens) * 1e9 |
|
num_gpus = int(num_gpus) |
|
num_epochs = int(num_epochs) |
|
utilization_rate = float(utilization_rate) |
|
overhead = float(overhead) |
|
cost_per_gpu_hour = float(cost_per_gpu_hour) |
|
|
|
|
|
cost, days, hours = calculate_training_metrics( |
|
gpu_choice, precision, num_gpus, num_parameters, dataset_tokens, |
|
num_epochs, utilization_rate, overhead, cost_per_gpu_hour |
|
) |
|
|
|
|
|
time_msg = f"{days} days and {hours:.1f} hours" |
|
cost_msg = f"{cost:,.2f}$" |
|
|
|
return time_msg, cost_msg |
|
|
|
|
|
gpu_choices = ["A100 80GB PCIe", "A100 80GB SXM", "V100", "H100 SXM", "H100 PCIe"] |
|
|
|
|
|
title = "<h2 style='text-align: center;'>LLM Training Time and Cost Calculator</h2>" |
|
description = """ |
|
<p style='text-align: center;'>Calculate both the training time and cost for large language models (LLM) with parallel computing support.</p> |
|
<p><strong>Input Parameters:</strong></p> |
|
<ul> |
|
<li><strong>GPU Selection:</strong> Choose from various GPU models with different compute capabilities</li> |
|
<li><strong>Number of GPUs:</strong> Specify how many GPUs to use in parallel</li> |
|
<li><strong>Model Size:</strong> Number of parameters in billions</li> |
|
<li><strong>Dataset Size:</strong> Number of tokens in your dataset in billions</li> |
|
<li><strong>Training Epochs:</strong> Number of times to iterate over the dataset</li> |
|
<li><strong>Utilization Rate:</strong> Expected GPU utilization (typically 0.4-0.7)</li> |
|
<li><strong>Overhead:</strong> Additional time/cost factor for data loading, checkpointing, etc.</li> |
|
</ul> |
|
Ouputs: |
|
<ul> |
|
<li><strong>Estimated Training Time:</strong> Total days and hours required for training</li> |
|
<li><strong>Estimated Training Cost:</strong> Total cost in dollars based on GPU hours</li> |
|
</ul> |
|
Modified from <a href="https://huggingface.co/spaces/Heng666/LLM-Training-Cost-Calculator">this Hf Space</a>. |
|
""" |
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=[ |
|
gr.Dropdown(choices=gpu_choices, label="Select GPU", value='A100 80GB PCIe'), |
|
gr.Dropdown(choices=['bf16', 'tf32', 'tensor'], label="Select Precision", value='bf16'), |
|
gr.Number(label="Number of GPUs", value=1, minimum=1, maximum=1024), |
|
gr.Number(label="Number of Parameters (billions)", value=70), |
|
gr.Number(label="Dataset Tokens (billions)", value=1), |
|
gr.Number(label="Number of Epochs", value=3, minimum=1), |
|
gr.Slider(minimum=0.1, maximum=1.0, step=0.1, value=0.5, label="GPU Utilization Rate"), |
|
gr.Slider(minimum=1.0, maximum=2.0, step=0.01, value=1.10, label="Overhead Factor"), |
|
gr.Number(label="Cost per GPU Hour ($)", value=1.85) |
|
], |
|
outputs=[gr.Textbox(label="Estimated Training Time:"), |
|
gr.Textbox(label="Estimated Training Cost:")], |
|
title=title, |
|
description=description, |
|
article="<p style='text-align: center;'>Improved with good intentions by ghost.</p>" |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |