|
import sys |
|
from functools import reduce |
|
|
|
from torch import nn |
|
import torch.distributed as dist |
|
|
|
|
|
def summary(model: nn.Module, file=sys.stdout): |
|
def repr(model): |
|
|
|
extra_lines = [] |
|
extra_repr = model.extra_repr() |
|
|
|
if extra_repr: |
|
extra_lines = extra_repr.split('\n') |
|
child_lines = [] |
|
total_params = 0 |
|
for key, module in model._modules.items(): |
|
mod_str, num_params = repr(module) |
|
mod_str = nn.modules.module._addindent(mod_str, 2) |
|
child_lines.append('(' + key + '): ' + mod_str) |
|
total_params += num_params |
|
lines = extra_lines + child_lines |
|
|
|
for name, p in model._parameters.items(): |
|
if hasattr(p, 'shape'): |
|
total_params += reduce(lambda x, y: x * y, p.shape) |
|
|
|
main_str = model._get_name() + '(' |
|
if lines: |
|
|
|
if len(extra_lines) == 1 and not child_lines: |
|
main_str += extra_lines[0] |
|
else: |
|
main_str += '\n ' + '\n '.join(lines) + '\n' |
|
|
|
main_str += ')' |
|
if file is sys.stdout: |
|
main_str += ', \033[92m{:,}\033[0m params'.format(total_params) |
|
else: |
|
main_str += ', {:,} params'.format(total_params) |
|
return main_str, total_params |
|
|
|
string, count = repr(model) |
|
if file is not None: |
|
if isinstance(file, str): |
|
file = open(file, 'w') |
|
print(string, file=file) |
|
file.flush() |
|
|
|
return count |
|
|
|
|
|
def grad_norm(model: nn.Module): |
|
total_norm = 0 |
|
for p in model.parameters(): |
|
param_norm = p.grad.data.norm(2) |
|
total_norm += param_norm.item() ** 2 |
|
return total_norm ** 0.5 |
|
|
|
def distributed(): |
|
return dist.is_available() and dist.is_initialized() |