Commit
·
475bc5f
1
Parent(s):
86b55e7
removed training calculation functionality
Browse files
app.py
CHANGED
@@ -23,11 +23,11 @@ quantization_bit_sizes = {
|
|
23 |
# Define precision options
|
24 |
precision_options = {
|
25 |
'full': 4,
|
26 |
-
'mixed': 6,
|
27 |
'half': 2
|
28 |
}
|
29 |
|
30 |
-
def calculate_memory_usage(parameter_count, context_length, data_type,
|
31 |
# Convert bit size to byte size
|
32 |
byte_size = quantization_bit_sizes[data_type] / 8
|
33 |
|
@@ -35,10 +35,10 @@ def calculate_memory_usage(parameter_count, context_length, data_type, is_traini
|
|
35 |
memory_params = parameter_count * byte_size
|
36 |
|
37 |
# Memory usage for context (activations)
|
38 |
-
activations = calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision
|
39 |
|
40 |
# Outputs memory usage
|
41 |
-
outputs = 4 * batch_size * context_length * vocab_size
|
42 |
|
43 |
# Total memory usage
|
44 |
total_memory_usage = memory_params + activations + outputs
|
@@ -48,7 +48,7 @@ def calculate_memory_usage(parameter_count, context_length, data_type, is_traini
|
|
48 |
|
49 |
return total_memory_usage_gb
|
50 |
|
51 |
-
def calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision
|
52 |
# Simplified activation calculation
|
53 |
hidden_size = parameter_count ** 0.5 # assuming a square root relationship for hidden size
|
54 |
num_attention_heads = 16 # a typical number of attention heads
|
@@ -73,7 +73,7 @@ def calculate_activations(parameter_count, context_length, batch_size, vocab_siz
|
|
73 |
|
74 |
layer = attention_block + mlp_block + layer_norms
|
75 |
|
76 |
-
activations = layer
|
77 |
|
78 |
return activations
|
79 |
|
@@ -84,12 +84,11 @@ st.title("Memory Usage Calculator for Large Language Models")
|
|
84 |
parameter_count = st.number_input("Parameter Count (in billions)", value=1, step=1) * 1e9
|
85 |
context_length = st.number_input("Context Length (number of tokens)", value=512, step=1)
|
86 |
data_type = st.selectbox("Data Type", options=list(quantization_bit_sizes.keys()))
|
87 |
-
is_training = st.checkbox("Training Mode", value=False)
|
88 |
batch_size = st.number_input("Batch Size", value=1, step=1)
|
89 |
vocab_size = st.number_input("Vocabulary Size", value=30000, step=1000)
|
90 |
precision = st.selectbox("Precision", options=list(precision_options.keys()))
|
91 |
|
92 |
# Calculate memory usage
|
93 |
if st.button("Calculate Memory Usage"):
|
94 |
-
memory_usage = calculate_memory_usage(parameter_count, context_length, data_type,
|
95 |
-
st.write(f"Estimated Memory Usage for
|
|
|
23 |
# Define precision options
|
24 |
precision_options = {
|
25 |
'full': 4,
|
26 |
+
'mixed': 6,
|
27 |
'half': 2
|
28 |
}
|
29 |
|
30 |
+
def calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision):
|
31 |
# Convert bit size to byte size
|
32 |
byte_size = quantization_bit_sizes[data_type] / 8
|
33 |
|
|
|
35 |
memory_params = parameter_count * byte_size
|
36 |
|
37 |
# Memory usage for context (activations)
|
38 |
+
activations = calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision)
|
39 |
|
40 |
# Outputs memory usage
|
41 |
+
outputs = 4 * batch_size * context_length * vocab_size
|
42 |
|
43 |
# Total memory usage
|
44 |
total_memory_usage = memory_params + activations + outputs
|
|
|
48 |
|
49 |
return total_memory_usage_gb
|
50 |
|
51 |
+
def calculate_activations(parameter_count, context_length, batch_size, vocab_size, precision):
|
52 |
# Simplified activation calculation
|
53 |
hidden_size = parameter_count ** 0.5 # assuming a square root relationship for hidden size
|
54 |
num_attention_heads = 16 # a typical number of attention heads
|
|
|
73 |
|
74 |
layer = attention_block + mlp_block + layer_norms
|
75 |
|
76 |
+
activations = layer # assuming 12 layers for simplicity
|
77 |
|
78 |
return activations
|
79 |
|
|
|
84 |
parameter_count = st.number_input("Parameter Count (in billions)", value=1, step=1) * 1e9
|
85 |
context_length = st.number_input("Context Length (number of tokens)", value=512, step=1)
|
86 |
data_type = st.selectbox("Data Type", options=list(quantization_bit_sizes.keys()))
|
|
|
87 |
batch_size = st.number_input("Batch Size", value=1, step=1)
|
88 |
vocab_size = st.number_input("Vocabulary Size", value=30000, step=1000)
|
89 |
precision = st.selectbox("Precision", options=list(precision_options.keys()))
|
90 |
|
91 |
# Calculate memory usage
|
92 |
if st.button("Calculate Memory Usage"):
|
93 |
+
memory_usage = calculate_memory_usage(parameter_count, context_length, data_type, batch_size, vocab_size, precision)
|
94 |
+
st.write(f"Estimated Memory Usage for Inference: {memory_usage:.2f} GB")
|