diff --git a/configuration_yulanmini.py b/configuration_yulanmini.py new file mode 100644 index 0000000000000000000000000000000000000000..2e99391c5c9018c6b0e65d50eb10f6f0dc9864f2 --- /dev/null +++ b/configuration_yulanmini.py @@ -0,0 +1,310 @@ +# coding=utf-8 +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" YuLanMinimodel configuration""" + +import math + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + +logger = logging.get_logger(__name__) + +YULANMINI_PRETRAINED_CONFIG_ARCHIVE_MAP = {} + + +class YuLanMiniConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a [`YuLanMiniModel`]. It is used to instantiate an YuLanMini + model according to the specified arguments, defining the model architecture. Instantiating a configuration with the + defaults will yield a similar configuration to that of the YuLanMini-7B. + + Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the + documentation from [`PretrainedConfig`] for more information. + + + Args: + vocab_size (`int`, *optional*, defaults to 32000): + Vocabulary size of the YuLanMinimodel. Defines the number of different tokens that can be represented by the + `inputs_ids` passed when calling [`YuLanMiniModel`] + hidden_size (`int`, *optional*, defaults to 4096): + Dimension of the hidden representations. + intermediate_size (`int`, *optional*, defaults to 11008): + Dimension of the MLP representations. + num_hidden_layers (`int`, *optional*, defaults to 32): + Number of hidden layers in the Transformer decoder. + num_attention_heads (`int`, *optional*, defaults to 32): + Number of attention heads for each attention layer in the Transformer decoder. + num_key_value_heads (`int`, *optional*): + This is the number of key_value heads that should be used to implement Grouped Query Attention. If + `num_key_value_heads=num_attention_heads`, the model will use Multi Head Attention (MHA), if + `num_key_value_heads=1 the model will use Multi Query Attention (MQA) otherwise GQA is used. When + converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed + by meanpooling all the original heads within that group. For more details checkout [this + paper](https://arxiv.org/pdf/2305.13245.pdf). If it is not specified, will default to + `num_attention_heads`. + hidden_act (`str` or `function`, *optional*, defaults to `"silu"`): + The non-linear activation function (function or string) in the decoder. + max_position_embeddings (`int`, *optional*, defaults to 2048): + The maximum sequence length that this model might ever be used with. YuLanMini1 supports up to 2048 tokens, + YuLanMini2 up to 4096, CodeYuLanMiniup to 16384. + initializer_range (`float`, *optional*, defaults to 0.02): + The standard deviation of the truncated_normal_initializer for initializing all weight matrices. + rms_norm_eps (`float`, *optional*, defaults to 1e-06): + The epsilon used by the rms normalization layers. + use_cache (`bool`, *optional*, defaults to `True`): + Whether or not the model should return the last key/values attentions (not used by all models). Only + relevant if `config.is_decoder=True`. + pad_token_id (`int`, *optional*): + Padding token id. + bos_token_id (`int`, *optional*, defaults to 1): + Beginning of stream token id. + eos_token_id (`int`, *optional*, defaults to 2): + End of stream token id. + pretraining_tp (`int`, *optional*, defaults to 1): + Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this + document](https://huggingface.co/docs/transformers/parallelism) to understand more about it. This value is + necessary to ensure exact reproducibility of the pretraining results. Please refer to [this + issue](https://github.com/pytorch/pytorch/issues/76232). + tie_word_embeddings (`bool`, *optional*, defaults to `False`): + Whether to tie weight embeddings + rope_theta (`float`, *optional*, defaults to 10000.0): + The base period of the RoPE embeddings. + rope_scaling (`Dict`, *optional*): + Dictionary containing the scaling configuration for the RoPE embeddings. Currently supports two scaling + strategies: linear and dynamic. Their scaling factor must be a float greater than 1. The expected format is + `{"type": strategy name, "factor": scaling factor}`. When using this flag, don't update + `max_position_embeddings` to the expected new maximum. See the following thread for more information on how + these scaling strategies behave: + https://www.reddit.com/r/LocalYuLanMini/comments/14mrgpr/dynamically_scaled_rope_further_increases/. This is an + experimental feature, subject to breaking API changes in future versions. + attention_bias (`bool`, defaults to `False`, *optional*, defaults to `False`): + Whether to use a bias in the query, key, value and output projection layers during self-attention. + attention_dropout (`float`, *optional*, defaults to 0.0): + The dropout ratio for the attention probabilities. + + ```python + >>> from transformers import YuLanMiniModel, YuLanMiniConfig + + >>> # Initializing a YuLanMini-7b style configuration + >>> configuration = YuLanMiniConfig() + + >>> # Initializing a model from the YuLanMini-7b style configuration + >>> model = YuLanMiniModel(configuration) + + >>> # Accessing the model configuration + >>> configuration = model.config + ```""" + + model_type = "yulanmini" + keys_to_ignore_at_inference = ["past_key_values"] + + def __init__( + self, + vocab_size=99000, + hidden_size=1920, + intermediate_size=4800, + num_hidden_layers=56, + num_attention_heads=30, + num_key_value_heads=6, + + # 不常用变量 + hidden_act="silu", + max_position_embeddings=4096, + rms_norm_eps=1e-6, + use_cache=True, + pad_token_id=None, # /home/u20140041/pretrain-mini/preprocess/modify_tokenizer/1731 + bos_token_id=1, + eos_token_id=2, + tie_word_embeddings=False, + rope_theta=10000.0, + use_sliding_window=False, + sliding_window=4096, + rope_scaling=None, + attention_bias=True, # qwen + attention_dropout=0.0, + # 放缩embedding grad + shrink_alpha=1, + shrink_alpha2=1, + use_liger=False, + # 初始化 + initializer_range=0.014434, + init_scale_o=10.582218, + model_reproduce="transformer", + # 下面是为了muparam设置的参数,需要保证:默认值是不使用任何muparam的部分 + hidden_states_shrink=1, + dim_model_base=None, + dim_ffn_base_init=None, # 新版muparam没有使用了 + dim_model_base_init=None, + dim_model_base_attn=None, + dim_model_base_lmh=None, + dim_model_base_logits=None, + dim_model_base_lr=None, + scale_emb=1, + # qk_layernorm + qk_layernorm=False, + layer_norm_eps=1e-6, + embedding_ln=False, + embedding_rmsln=False, + ln_scale=1., + z_loss=0.0001, + # wesar + wesar_weights=True, + embed_tokens_alpha=1, + q_proj_alpha=1, + k_proj_alpha=1, + v_proj_alpha=1, + o_proj_alpha=1, + down_proj_alpha=1, + gate_up_proj_alpha=1, + input_layernorm_alpha=1, + post_attention_layernorm_alpha=1, + norm_alpha=1, + lm_head_alpha=1, + use_norm_alpha=True, + use_emb_alpha=False, + rms_type="llama", + num_steps_trained_before_this_epoch=0, + num_epochs_trained_before_this_epoch=0, + # 加速 + gradient_checkpointing_step=7, + **kwargs, + ): + # 训练states,每个epoch更新,epoch内部不会变。比如训练到第4轮数据,这两个的值都是第三轮最后一步的值(epochs=3, steps=xxx),只要是在第4轮,无论是多少步,都是第三轮的值,由update_trained_steps_and_epochs控制是否更新 + self.num_steps_trained_before_this_epoch = num_steps_trained_before_this_epoch + self.num_epochs_trained_before_this_epoch = num_epochs_trained_before_this_epoch + + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.use_sliding_window = use_sliding_window + self.sliding_window = sliding_window if use_sliding_window else None + + # for backward compatibility + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + + self.num_key_value_heads = num_key_value_heads + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.use_cache = use_cache + self.rope_theta = rope_theta + self.rope_scaling = rope_scaling + self._rope_scaling_validation() + self.attention_bias = attention_bias + self.attention_dropout = attention_dropout + self.shrink_alpha = shrink_alpha + self.use_liger = use_liger + self.init_scale_o = init_scale_o + self.hidden_states_shrink = 1 / math.sqrt(num_hidden_layers) if hidden_states_shrink == "muparam" else hidden_states_shrink + self.dim_model_base = dim_model_base if dim_model_base is not None else hidden_size + self.dim_model_base_init = dim_model_base_init + self.dim_model_base_attn = dim_model_base_attn if dim_model_base_attn is not None else (hidden_size // num_attention_heads) # 初始化为1则是使用1/H_dim + self.dim_model_base_lmh = dim_model_base_lmh if dim_model_base_lmh is not None else 1 # 初始化为1则是不放缩lm_head的init + self.scale_emb = scale_emb if scale_emb is not None else 1 + self.model_reproduce=model_reproduce if model_reproduce is not None else "transformer" + self.dim_model_base_logits = dim_model_base_logits if dim_model_base_logits is not None else hidden_size + self.dim_model_base_lr = dim_model_base_lr if dim_model_base_lr is not None else hidden_size + + self.qk_layernorm = qk_layernorm + self.layer_norm_eps = layer_norm_eps + self.embedding_ln = embedding_ln + self.embedding_rmsln = embedding_rmsln + self.ln_scale = ln_scale + self.z_loss = z_loss + + if embedding_ln and embedding_rmsln: + raise ValueError("Only one of embedding_ln and embedding_rmsln should be True") + + self.wesar_weights = wesar_weights + self.embed_tokens_alpha = embed_tokens_alpha + self.q_proj_alpha = q_proj_alpha + self.k_proj_alpha = k_proj_alpha + self.v_proj_alpha = v_proj_alpha + self.o_proj_alpha = o_proj_alpha + self.down_proj_alpha = down_proj_alpha + self.gate_up_proj_alpha = gate_up_proj_alpha + self.input_layernorm_alpha = input_layernorm_alpha + self.post_attention_layernorm_alpha = post_attention_layernorm_alpha + self.norm_alpha = norm_alpha + self.lm_head_alpha = lm_head_alpha + self.use_norm_alpha = use_norm_alpha + self.use_emb_alpha = use_emb_alpha + self.rms_type = rms_type + + self.gradient_checkpointing_step = gradient_checkpointing_step + + if self.dim_model_base != hidden_size or self.dim_model_base_init is not None or self.dim_model_base_attn != (hidden_size // num_attention_heads) or self.dim_model_base_lmh != 1: + if init_scale_o != 1: + raise ValueError("When using muparam, init_scale_o should be 1") + + # multiplier + print("Attention放缩:", math.sqrt(self.dim_model_base_attn) / (hidden_size // num_attention_heads)) + print("Residual链接处的Hidden States放缩:", hidden_states_shrink) + print("Logits放缩:", 1 / (hidden_size / self.dim_model_base)) + + # initializer + if dim_model_base_init is not None: + print("o_proj,down_proj初始化STD:", initializer_range / math.sqrt(2 * (hidden_size / dim_model_base_init) * num_hidden_layers)) + print("gate_proj,up_proj,q_proj,k_proj,v_proj初始化STD:", initializer_range / math.sqrt(self.hidden_size / self.dim_model_base_init)) + else: + print("o_proj,down_proj初始化STD:", initializer_range / init_scale_o) + print("gate_proj,up_proj,q_proj,k_proj,v_proj初始化STD:", initializer_range) + print("lm_head初始化STD:", initializer_range / math.sqrt(self.dim_model_base_lmh)) + + if not tie_word_embeddings and self.scale_emb != 1: + raise ValueError("When using scale_emb, tie_word_embeddings should be False") + + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) + try: + import flash_attn + self._attn_implementation = "flash_attention_2" + except: + pass + + def _rope_scaling_validation(self): + """ + Validate the `rope_scaling` configuration. + """ + if self.rope_scaling is None: + return + + if not isinstance(self.rope_scaling, dict) or len(self.rope_scaling) != 2: + raise ValueError( + "`rope_scaling` must be a dictionary with with two fields, `type` and `factor`, " + f"got {self.rope_scaling}" + ) + rope_scaling_type = self.rope_scaling.get("type", None) + rope_scaling_factor = self.rope_scaling.get("factor", None) + if rope_scaling_type is None or rope_scaling_type not in ["linear", "dynamic"]: + raise ValueError( + f"`rope_scaling`'s type field must be one of ['linear', 'dynamic'], got {rope_scaling_type}" + ) + if rope_scaling_factor is None or not isinstance(rope_scaling_factor, float) or rope_scaling_factor <= 1.0: + raise ValueError(f"`rope_scaling`'s factor field must be a float > 1, got {rope_scaling_factor}") diff --git a/global_step243198_universal/mp_rank_00_model_states.pt b/global_step243198_universal/mp_rank_00_model_states.pt new file mode 100644 index 0000000000000000000000000000000000000000..e760458b84b1a3dffe8b3793331eaa2a5810c554 --- /dev/null +++ b/global_step243198_universal/mp_rank_00_model_states.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db6985e1ae1d922aeb69f6ee9e4f3622bb7cc6bc39c84b4e7fb205bf424ffd16 +size 4468641136 diff --git a/global_step243198_universal/zero/lm_head_alpha/exp_avg.pt b/global_step243198_universal/zero/lm_head_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..88860675c37623ef910d3d9d9884a09f6a419c13 --- /dev/null +++ b/global_step243198_universal/zero/lm_head_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eb07a5a76a984fd7c285015c8133863bc36be788dbcabbb37dadc5ee39daf25 +size 1180 diff --git a/global_step243198_universal/zero/lm_head_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/lm_head_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..09e1c06f06e955affbd38083a9810c980622a51e --- /dev/null +++ b/global_step243198_universal/zero/lm_head_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7922757b2cd71f4c6f9e8f22d5da4455206f1e8235d399154cba99a25aa10f7 +size 1195 diff --git a/global_step243198_universal/zero/lm_head_alpha/fp32.pt b/global_step243198_universal/zero/lm_head_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..56927651d69bd39189cc0f4cc5fa55dd74cae1eb --- /dev/null +++ b/global_step243198_universal/zero/lm_head_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c20c4ae233bac39d7f1c734eecf1ecfd49ecb24d4456e841c3277defee67c0cc +size 1165 diff --git a/global_step243198_universal/zero/lm_head_alpha/step.pt b/global_step243198_universal/zero/lm_head_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/lm_head_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.embed_tokens.weight/exp_avg.pt b/global_step243198_universal/zero/model.embed_tokens.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d104ed3e3aa84302c7874f62a064d375f71437c5 --- /dev/null +++ b/global_step243198_universal/zero/model.embed_tokens.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66b366212fbe90c8a0f71848be5713b32b1b4afaa4ee7daebbb3a0267d4b34f3 +size 760321244 diff --git a/global_step243198_universal/zero/model.embed_tokens.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.embed_tokens.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c31ce5cbfa94c954d06642de401529c788d37152 --- /dev/null +++ b/global_step243198_universal/zero/model.embed_tokens.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa812bc06938dc09913fd4af9b54fe35d5f43b06d58dd4769decf72b11bba34 +size 760321259 diff --git a/global_step243198_universal/zero/model.embed_tokens.weight/fp32.pt b/global_step243198_universal/zero/model.embed_tokens.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..88efb71f6c66f525d9f5cc4924da98d88e08e196 --- /dev/null +++ b/global_step243198_universal/zero/model.embed_tokens.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dbfb5af6c84319b82b3bbb36115d30c429b49ff2bf73d021b18bcddf9d68d1c +size 760321165 diff --git a/global_step243198_universal/zero/model.embed_tokens.weight/step.pt b/global_step243198_universal/zero/model.embed_tokens.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.embed_tokens.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6d80f83f8f836dcb198f3bde5152e965d005cc9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae4c645770e222fb1f0f6523d5562f36611152c7f98c328ca9466acd40d1365 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.0.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..000fbe52acdd197c0b4fd6095f257c6c420f8a82 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b354c1435a7f3425ab4e830d05db3cf4869e7bfc74b507c8e7116e493ec53c83 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.0.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.0.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2f812a51e6926155c82b69df274ca2c9f2c358a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bcab85cc4015d87092df58f21e7cd1e742e9bb8206c231a56e1d5d4619f9183 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.0.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.0.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8e343c78c2a538ff4ef7f1dd5e3aef56c226b3d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65b57c152cbb08ce685446051cd06ff9f1e2d5bd6887714cdf660cf9a3de017d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b5d19e85dbb894613758c96a9a92c75ce8c2714 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:565662ac6efe6c7cef53e77bc4e477ec5ec939596624953fb13a1010eee355d7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d90b00730fd554683752c312181ea498081813c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4df6e1db7f668d5fda482d2bfce264b78ddb7011e260994070a31d775aef2e5a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a405df7941439e77967264f7604c56180a34867 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56437f32584526c52e3e64cd6f8178abd65ae00f23d92cda8b270703a5303546 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddd76d99cf40586df314db57a1296e12f728ea79 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff19e5a59d8191c86ea05d045183068e57d96342047845828d558e9ceb15620 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..99d3b708e758e3a494006d40c2d0610e16fd3c49 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dda538f899b574fb54055ea16478fe46b19cf517d36da128993f92d05994a61 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..347064269a703ff36fbadaaf5a30ea72c86594a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc159c005aacaf434ab98d149cf0285e5649a99db6aa92744b337b142b7dca34 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c72905852715532042ab47c67122a31de77c73b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21322db8b68329175d238406561cf4d5bb5ca42ee4d7aa6b413bf1e1e39d1bf7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dae2e21637edfcb376eb8287f538047579f9385b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9cf7b71ff8af16fd86bf824579093669cd08ca7647bdf58950c745f955dddb3 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccf1283071e5c72fae1754f2d03ed9ac69a6b174 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a5a995a8f87848019493c2b98cef7baef0185dff5eb1127eff94fb97605e59f +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..40f3ad0fede494575f7f11ad30474afd8cca1d94 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af930bef14ae385ae5232c116600d1e943d8bafd585a1c47cff167613c7d9aa8 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..70d33ff0eb44693b21ff8872a333769ef930c782 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b4fe272c9c3469c50e587a2cdba134c8853de0b8fbd7d434bbc32262e06579c +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9978d58271ec04d0713616725c9e78862966ce79 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f5e7e4b6f4f76d9905221fdcb37126d77b4a81c97adab024b614eb53897f96 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..55b6487a512a040bc5a703a73e4880e2f9641ae1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb476f2a4f0a97f32014ac80c232a5af2af99e3406d47240a992501f1c8008c +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5dbd5747d0da6dccd7e2d34387dc32d3bd805e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32edb29fb545e7a6e65df5ce13f9ce82765a7acc34973fb7371e7a42f20aecd2 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a46b43f436d343dafbaf59ccb6fe6a5e47cdc80d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66908726e6ea3a2a9becc5d89c6f6112ff523f2a284b8a95b148bc5653677830 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d0b3166b82cd28e9767913393520e0a3f6d0ede --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a0bb8ff896f33ac32fda4a0172d34b2f50fced9dc23d3ca29fc001f08419c6 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ff068528bebcd0c8e8b5ddbc74c0cf9df8305cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b81eaf4bcc3879df3bc692d9e4652ad9ace3a7c80a197d7f949645d7ca314f +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6e55e3e264deec71525d413b62f586fb409758d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:403047234e9fc921ecb9d0e4ac97e2cc169c94727c4370996e3d13ca0965d6ef +size 8860 diff --git a/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..38355fc72a111aa8386243300c71df273ee48f0c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4346ce7447f73b7134e4156814493c77496ad05d62c13a5ae068b6f58f1c51f +size 8875 diff --git a/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb9223460660e1f455bf61eba3b1cf710e704b06 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ded81e4d339d6394f1a69aff90f9a5c2e4c745252badb57964f99e95c0e3d0c +size 8781 diff --git a/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a699eca512b4e74ff27a1cd41b6fcc4aee04210 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:106125c7121823feebee36822359de0acda248d8a33f6aeb742ee49f69b242ec +size 1180 diff --git a/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..629a285e09b3984b6f4b51deaac2dfa08af6619e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f73b5e2b79fc60328eda519f0e0d55a740ad0b26f74ac77fada9cbbc4ed2273 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7165cb7b28487aa6662ae487a1a216b52ffb290 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b155f15ab3e8a66e65a5abede44dc417fa2413777cf97d9bc5149c19be7639 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..97e4862ab76c17cca167db2569e5a7b01b8ef366 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b1b92c6e5d2e9bb51a8189a62df50f1840210bf7a9e74bcbd4eae1b17ce837c +size 2716 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3da03ab1f9aaa0dc450574c46561ad16286d11bd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b79f7644943018df3c1a82fb2e7269bf2a648d92325561a64d9a26fbed095003 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b7cefebe67e5cfae7739afe6c9311f8c1398451 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303c3b60f63ca63af702d730e4f122d49e2a1e4aa34b9759205e90d76cf4abc0 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..81e47d460974ad93d3ad11cadaa409649ebec1a1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe5236074b29d11d9d27b054e3f505dadc89c62d93839752b04bd4d7948783e7 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bab8d00ead195727baba7d0637f6cfa4888d37b5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fef640304417dbd1f4849113fd9884edefe0e7d8c5edd51daeee06abc45f456 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..738ebfa8832a9c04fa9159fa6e736ea5a6f95ad8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af4b25b3073cfbd9492d6f143ed996c4f66bde633fcfb284cb9e4f96d7e9faa +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f99de163543c5cba678238eb4d07e21ae3e2a165 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae672d4b5dc06a0f0227bccbc92d2a1c051f1a3255918d1501a0012852b82595 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..72c3da7467adcbc74a59ae1f091398a447ca3c07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4c9275b2c571ec17576a90916eac5a5cdca788caf3bc705d178bd13c526b993 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8e9417cc85d332702cd5c44e2fe3a968333130a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca2ad48568a0b1864843450f8beb94e605b6ce2234e294139ba930f05a6b5a6 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e8be7504b31fe4ddcdc87bfccda00c378d7fae9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c505c1711b683cc275e47a2af8a25d7c85492206ac0934e425bbcdd495b2e9e1 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbd1888e554fca29378b5b295832519499ba2f8d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a6a3228e7c1bdfa521853dff7203219b2eaf5b5a90daebd622e6325ed07467 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad55c3e375e994a6319b513a6541f3aca4b7748a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f8562dc2c823a6c5bcaf7fde7ea70576a58bb56a91b50d4b07614e83ce51f11 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cd4c411dc28cc9f5ce61833c4cb7eb1c74507ae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:059ea73603ace4f46c45a765e05e0b73c6f23685b9598c8f878689c55b132854 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b881fcad44c0537f1338fc0bcd6d4ab9ffd5399e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88c884854d24cf6fa54e1becb4ca8ee87286e820e4c9c398a9c9b226e445d949 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa9b666c58040bd319fc37f6e0ddc993a9c8ca8f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7917aaafc13a8d564fb6cf2d1872c83fa1e4af9e3254222d1742cc0316a046cb +size 1165 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6f42da3a0e4c01b2e927bced851bf943930166e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8a6b75c021c0ac6036eb96b81b87edfe389fa8771dbd9a0ccbdd6d16c048941 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f39fb982ebe6a9dd0c796e9cf00c26e77fc3823 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be11bdcf6c279140af64159efbea2012663836a74616d80c7329e0b57835049f +size 8875 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..21ef0906f06a9ce33506c5a8df1de819b4cfa881 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65ae35fd4202d8c156246cae65e93f86fe888872728cdd3042ad9a26729bb51c +size 8781 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ab7c34c7debf4c5dec2aad658851ecd6e0d8489 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455c423db215d10bb51403ae9150568fea69e6cf8109db7fac57a5616188963b +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0698f646a97f8980d19f61f62f5c22ddf4f32905 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03696b3022e61baac3a1dff73df6e051bceff4909b50f508f23cb28a7c9531f6 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..626f394971732b3472f959104148c04c5de30499 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95822c2c796ab0bb257756f4baa06d4dee34ea3508ece0c29b18857060d02549 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..49d79d1c9391909af79fdcb6903aaf315f7058be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85a133810838b4a5034ec161ad181a8ec86c7700c2b1c9b7cef1b216c7601a37 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffaeef6a33b563874ec361abb7c0cd7d510e77aa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7f727123be55daf174ec8242b7a8043f0ce9d475c1e65dc266071518f615b7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9cbdfac31b41876b222505f07a27945ca7475fdf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64b4533f6b675ac1c8dc65467bb6d817c2832870d33d73982318876c8722c858 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a7197be4323f30a17e2439df957fb550df244ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c853b920143eee9ffc1df307bf5d0ddfc0a61333cb5392485ac72a352f3032 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5b475d1e8f73afbc05ab7c3c179abe115ff99d6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee0e38a642246f5b636da5d36360888c2e9c2d0cefff92c566723e99e0a7bbb8 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..aacbb13c462f136c7034c20103e4f020b8cb642c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:865d9b88fd54d52727584ec575fd37afc2c98f79151aa2aa9d020805f3abd078 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf6211767175b4360ff04ea2bdb5cb03341e517f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7feff1ea1d07887656455b44675725c6b4abdcb48f571fad3e2a1c675f1c1f60 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9cac415f9e5c19ab68fd54787b7bd083e00e9056 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f25ed6f5d2fa0851572c56c6c73abebdeb91f9111b385d411c2a4bac64160464 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..755317fd38d8ac3ba949d2cc235f6a37c2ba5b5e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2265555d8160f2deb24546b4a7bf1ec370b964e770e10ff5336798a8f3a455a1 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ada17f072e67886e9deb433e351f57099edc0765 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66918efd231e28e5159b108850cd484b9dbf4260c4be29469891987a53f3f004 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..04eea8390f11942e18206b0975db0e442b00845c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42e6d7d25eb020dcfa836f0a00386ee8d1c114cdadb29b8a4f49ba0001989626 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..00c901c8f3716c25521c80495b6098cd3a3728cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78cf8e85ad0aae4fa0dcc1d8d754f132164bc07243530a2b209259876894299a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.0.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..be698b9e7f51600bfb6e1c05d315439b59896acf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a905346edfb0011ad81c6e4369453592fcfc83ea28dc5d6cbf4d641d52cd1d3a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.1.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d77cca64b03725ac20c4a35545ff330382f4cfa4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f3296538dae5fd6a34a3cf6d99cab784e24ebd84dd6721b5c4aba0876603bfa +size 1195 diff --git a/global_step243198_universal/zero/model.layers.1.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.1.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..897998ea47383235aeebed4511aa828aa55e4606 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b010ee3523a4e4a9a81ab7d98fa3674b76ae7c3fdc40b2f436c54769f424c965 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.1.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.1.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c466b65fcf93eed0f60f4cc6417b645d071c616 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b7ff08c261ec4883860a2e27667da2c633536ee188c541d6aac66112d106bd +size 1180 diff --git a/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e920dc0b68faada14f14584b86c6faff8773b021 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21691f69d180a8dbd0c3ffa5d3f63a3cbbf4bc0a49c8ad5306e172f1bcb3b7d2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f059e361e137422dd16c5236b1e234690ddf1f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44731f264fcb86ce949f747dea0f94fdef3021b925debb943840600520539d14 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8157b228c6e2cfa608c497bc05f415b17fea227a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394458738c182c456eda17dcc593c236e5dc0461e28ff1384d150c18a65f590b +size 8860 diff --git a/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a46d517a08c3a254df8489e4e2543fa37039c943 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b7d0742b6ed9d87fecfa42a74e19084b7636272a747ba6dfc5d8afd447cd97 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..28c53a8fa71390d7d8ba84f53b9e868ad699e94a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1455486f38dada1f9023c170196f101ad57f9cc734b1394c6825f8a982f7fe8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f92bcabea78828cde25d152b86cb555bebf05c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb7f1d55bc869d8a57d1cbb52ddb79e9bab657def672a592f70ae7686a1a0bdf +size 1180 diff --git a/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7487c46ca1fae8a467f2a3bc063345c791366f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f1164ff715ce276362e528d7f7b865faa47344213f9734a71c7c84cf2a9bb07 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..43d23becd598795353e6ca5c82609379a4dadab5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ce2f75db79981ceebb3bcfec481715883291b7d824f837bb1341dd091506f1 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..67081cea9e4c8c68fe4cda836789a0f834325624 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f9ec21eb08e5fd010857cbf511471565aba784439a00fb7a9d0407c3c87fec +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e34ffa923c9166856cbbe984b8e25fc35e4c65a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3014602d01ecf1feda3b387905afbd39474169a72489e189d25dfee83998aa00 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..367b510d1e87100704b84dc0e5e78b98bd553af2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3ee59bc52641ea2b60be1d825fcb284239ec28ce8945e79835c2f01c1adf9b +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a646ab9d6e5b5cede24bd659bb92b589ea8d1b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bded80df20bdda23491a35b236a514b5ae8de854c3d68ae85b964009f36ac480 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b909b1582298077396aee1ae6d0be70e3cf5f029 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc6a6bf97ba25c65390f309e692f6ce4ea78ba0db1813c3fa54a92fadba24ba5 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..25d36cc2bf73c7d3a111672f5269b8908847d4e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:764d0ec1857c339b4ffaff19d8654d139f58f5e204bfb0380ebea7d4e2b9f4ac +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..aaaff364f5fec1ff7e17fd015a8eb588d7d6d32a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f94d31f71b079e4c988958bea26f17b5b8d6126b772bf72b594e168e62047bc4 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b51da677b3b6d3e614e94880912b8264cc0bb0d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19285fdb87c8568af319d3a48b478694ebdd0add41eb802a1d9beda2f97289b5 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c90a0ce25d63de8f1f3c2bcbd14dcdf485ce61ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6093d31722b43bb1bee9740f6b2ec03ef38ae3fa59e70ab61f5055aa82855eb1 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2fecd04d500085ac18faaeb14e90c83edd6efcb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a209a57462664322504116beab3c08d221e148c89004201d6d919dd9d2485fc6 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b4c726e81e97363641edb62a28e9d5add4264c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09f90c82099a3e2a3ea49a0a2fe375be6d83e7d3740b12a35edde43f1cc33d5d +size 8875 diff --git a/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c2f20d1ca50f7392e0f0372a910c7c18a4e1307 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:584b8f84ceac480aaa370c514695dd269242d3108181ef7c1f7a3baac2a8509f +size 8781 diff --git a/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c615b9dcfcfd72ab73aeff5816a40c40a942b0fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e542ce31ed167c3dff3a5b42988f96d8b279203c5765672c180d38ee8c4ab7d2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a97a38ab48a4bee75c83fc5830c921b6dc721f8c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92b25904b4d55dd717a71287d85f61b286d5e3b479f51d4b3cf8b6c42ae84995 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ce5697a48a331064401991a9a32ba75cbbc6b40 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d37a8f8be3eb773cf234321aafe01ae4a64e54c222a20fffa0c14dde4fd0dd7e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4ccdcf9565d16afb1803575ea78d465c80222a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad9207e8cd88b06ef1021d9c363d4bff1170f833e3872c334fe65c984cd3de6 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a4d1dfef29a7ff40be00487d59b68df4b2d9aa9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17d67532e0262b9d76bf324de06021e3dd0a1e5cc44a5ffa139973ebec6a4912 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..50d7a90fb32c73d5a29a24bf76bbf70eeb5fe19c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dff120e8d46afeff78d98f320cf7e3ac66341ebd3e8b07dee37854b55a6ac672 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..affe6510d2094ae422a39d557ee745365be61fb6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adca59712bf4c608af8c9a0bc8cbaad300f6b08e1ccc897543b966083923d6e2 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcefe4a8c9ae91ce972c64fdf53207b64d19b5ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ae892cc191e9e298033dc9b02941301fd10f98ff8d87a68f9201679e78546d +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..db26fab1392e3ca6467e68bb08adbdbd54052914 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba723a889d6d34566edf2c5205912631488b5437e1483df50432e99f7e991f66 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e05be7b939d02a9aa410ca882f1e51d75fa17b14 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3563cf2486724da339f41359dd3fbb10ea8f75810d742474edaf69183ee2da17 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f38ef263334fed2571a2c830a23dc2c7227a31de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893dfaaa2d3bc8619a8368c5bfa1d5cefe941d3bf7e52e82333ff6e88d538025 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..98259400b222a0dbdc23ab580d319cdec690d278 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdce47350934d691efb71221d09bf39e4524c815b4fbface70b3deba2c6401f4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bade0ca51623e2b3a519b814019b767fbe6a66c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa6daf8329335e6dc6f4a5aa5da055ec20d10a964ff467131bf00e6e6c05babf +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6f3c7f4ff8d4a75a7fdddc284fe04d172557472 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e30ee6a3077dcb60c842d17aa6b5e7fc143cd284d43dc7a2a194730688a25d +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..037272ddeea7e6ac3444d55100fcba684056ae48 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fab52c1bfaef2914dc8b4f16691b329d094d0e8645fd0f5d4ff40c72eb0f35ca +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d87d957b1e1d4e15fa3d17464828fb5f51f937e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4d2ee53803a441e75988f98bc53a309935eabd58e8029698f86b94ce2cd18f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..89f0378458412ad20cdb36927b7b77763559c61d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a1aa83f7784314a77b413478ef295ec66c4d44b55303c52e779f42a91ee3f7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..47b98ec75ed118a2ec01771ac876bca237a7a0fd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b3beb0a6ee9b6aaf3f736236e5b3d7b22ff9356767b2ba13490d5abbace1e8d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..233b2e27fcd0ca2e75237ddd78bf402a950bc4d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57a564ab60e20f7a81a838caee36844cd4c233ecd6a1633bb88f42fe87341a3 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..30ce533728f310c84e1a6e9c7ccc48b5cfbc208f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac6f720a45f9999fca8dbf5776bf5c9cccbb1d2a141a156f8f7e51da2ea04634 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b28136f90805da11f50309e4c8f13756379bc529 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd1c1d60ab2bca6a2ce43e3cea608dc31da3e533d9832d58535013e1ef6a6f09 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..381a50e87f425126c216cb311ac8ea5a93370a65 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de9c8dc94a03dd58b60074c62bfb3c5d802d3336e835bcb456e319764edf6aa0 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9ac042bde80c87c67c889e7c111e0b30e002647 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677c9237519d466c5bfbdcef22566282b5790dd381a0d79e265e65759529b5ca +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d31abbaaf7623bd147aaebccb74f7e04b36f78eb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:456f0836f86fe7a7f11307296e19e9e1c59a742e332ffef2bdc5ac57896b438a +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ccb68f1a3ffdc6e4d5396a02e7d0c3ee45f22cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13f563efcfc659ebcfd7edd6e16282bd49fda38322be085986b5d04bf4603d59 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b040582674d69c3a0d70ebfeea32921fbc1ba57 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b60436808cbea7e9b4b2037e7584e41f8051fd236375f7e891070d502843287 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b7ea38ccd2597a00de32564491473a05bf0ba4d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20a6e22ebfdc3b2db25a8308a86a2b0ab1f827e461cb734c014bb911e09bcd00 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0aadda9adb3fdf7906da6e2e8abf691906454975 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d54548864d5ee916593b301e8e11a717564d9be5fc7ff4737e01b2eb35e0dd +size 2716 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bee81786a0625fcaa1a04fbb7f8658bb5c814b0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f984371cf48303cda6899c9e96f0737ba024adeeff2ed1576bdb07383dbad5e +size 2731 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c351ba9b0d826a5db0e59d303ca1b71b8fccc69 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d8db973668e781eca8b008516eaf1730d71e680235ecb109c8793ea1721f36 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2293f6d2e2bbfd4a5bc1299aa5127dc175887417 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbee6a499e316c1b7e1f9c1e53e45ad88aa466d1cea4ed976e681e87b8914381 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a695fdeed4005334ae5d4ebe839102cb74f2a601 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06f52b50bc9828f325bf8377ce4b014f6449d061153daf122e65e3358645efa8 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..efbb814f581d9ee7a0514276f6f4583617f14a95 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4916b01cac65a30d968f9e0a1d5b18a63453cafa51a0fe4b3c39aaab9ce019f2 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7517ebbf01f95f26e6b2072572998464ce37e1a4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e54273e5a102a7009294dc19ec8b06f2c57bf2101fd52d8b0c0485997799ba7 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0947496cb6e7c0be8aae1e60908337747b509b3a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a380e950ae94464cdec65859a35be30dd52ec45de2d7e35b156ffc0b9559e2a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..48adc90c7cab5466374f5478b759e8a638511a10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dbe7b1e9ac1e7720fb56f15da65a6b31244db594252c269a46bda368d354e56 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.1.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d1d526c067c1f2d485f8149d1ba093b98e0a635 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ecdbb6ac312a2848835eadec9cd5615b09c282f9cd0e6eb08c37272575fbf8 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.10.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..326b2f80b2796ad5c0ff2092976cb9ded24a4a8b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36c6b2b1d238f47a7b2ddc362f943cc4cd4ad8a02a7b344504e9b1b4819c74bf +size 1195 diff --git a/global_step243198_universal/zero/model.layers.10.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.10.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9902562f7b77fc8ca5e951eb97e2c1ca04dace45 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9800941e9b088f25fa2b7447a6d481c96f27aaf1b80729f1888155b0f38b298a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.10.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.10.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c286bdf646c28dfb7910a7c2116df0d991b9240 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e58055bd0c2c3b0aac99142d72a620725bd9c132e15e96c5ce063d7e746cb42 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b98f22331002a0db66531877b4331c81daa5e264 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adaf796408fa952dc4ecf2c2c8e2a6c204107abb2bfbbe10856361a17ec2935b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c9e6aa42d9ca96c0f678ca94ef7fd26ca1c3d3f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd298b0a07743cfd689ec870ed0e7eaf741391d7b42cc33864538f0912cdc13 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3649060d567eca261049e4d0cbaea5bc1a9a431 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aa50e6a4f58a5fcc52a90931a36e089c6632b3097d23f77dc689302ee2e8815 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..98654d9f4c3bec8b6ce7958d1474233bfb4d3018 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa4d71687f38470f75f38009fb55bf2f8fa2fa8642c899949124e35e4b6007f2 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3337f25195c3a66c83a495f02ec3b422e59fb33b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb7b3cf54d6b4da4507100e76a95ada2fa43d8a77fcdf3d7a5e536f8ef6d7435 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ef90c5af23b4bca6d5ecb7c9904ebed47517cb4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d26705575e16540dc8d0fe8623943ca155f9ae50806cac5688493129ea325b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a8805cdb3e8bd85ff13bc2f703e0d0b79950ea8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9eac929d1d238173f2a675334117aaeb060d0f27570aa161f758d3988ecf928 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..630605647725352aa15be3d5a15ba4a5395f798f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75889cb975fc4b7ff9afaf5afa1c5568132420f3c3a2d32bc727ea56fe3d6c85 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5334f51dfd175bfedbf2e3cd279cbf9a3c30c222 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844a5c17c87a51448684dabb8921b12887dfe0c33e4ebfe3b678a6f2f3ca22b2 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..70c79496cf00c1ed5746022411872cc4e9be8903 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996be335c5c9a30325affd6a12028a97736b86ec1f4b1a94ace81b9d0ca8619e +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..00d8a1406b1405282da0a35f9000defa2bc74600 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3704053fdc6e86e3a349f62e1f7aae083b23c4dab8027b22b79b119142a8e90 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bffb2aa75c00fb9e11489a135b81c5cd208aa0aa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ee0e04fef00c1237f44dc37c979ada37951918ac4d0dcf07075672d7735bfa2 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c9a1cce80fc051b2f0e39df8644d9997249ebbd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c44041e3038492c4b630ef9f40160b4c71d3bb36b33efba3910edb5a8d35046b +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..892ef401e60dd9e3bfcecfc19926cfed8ca54746 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffdfd2a6942a96c3595a7191cf029086b9d31a7a3afdcf0a355c409e8469f738 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6e1f9eb6b5dd818d3ce2c82b1e8b087c89eb17b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbb9db46f3f025528c5db642696bb1d3c960e4f23c6e0ea736c98495399c456a +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac24225610469b62893f0be1215041a8d9632fc1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc66e25763c94b2799248d234cb3c29bb1ff146ce1b6be0a3f6d405231834560 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..608c73d9eea74b668ca87b8df75c9cf61e8391be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba7078f9b03671727f0922cc183c4c01f8b05b8418d02fefdff80044d87f340f +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb076b3add56fd552f02aab562541d482d50ffab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2fb27ecdc6773ad4e988120a1d83276339eaaf7abd2dc81f1a4441d28675847 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..15a7c9b354e9a85f205dfd26655f3ea24faa0cd2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:042d34afec46d16eda9cbf3dadf54be34264f5dbce352eec7b66e56712ffff9a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..639a171f470cadc64c49ffd2a80694c14143e671 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15caa29994cac60ae3e5a84ce873d816738d599f0d8b8abd5c2c61d9a7677202 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c1df49e8708cf7b33e7e6d173f14024920e589d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b3e266074e4f098b956519ffcf1c13400234787fcd26e5e05d666ae0d4f1b7b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..79f83580b5a5e4588b6b94a07e63218d5ae94557 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef28ddc14def6acaf06dfa187c49b2d97cc955c9e1048538adf4d2ea86f01770 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..17590c1fc90c2e9279e2f6b5759b0a39d0e41904 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be3fb35b6da468cfe297216805c195f9c8a738d65f8dcef23ba71d9d4976706f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..abc620f8071be15ca3e9891dff1c701be75ab4ae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5c43bd2b823624c62fe032a3e7da9493d1b1bf831a0929b9ec269a6329a13f9 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..684cd60b173a3da7d2adf4ef747c7b8cb19678b6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d878702f62e98548d197f73f0398e42e8e816cb70004b112d62478b3246d1b9d +size 2731 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb03c5e115f3c5037e4d71964063422a5ebbc798 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9c56a011883980da3f7f579b6b3d3aa6182a851a952e91b75dde00c26a25663 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..42376c4f0dc919325c338aa300c4e9b4df138af1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf3d5c910f39a113ff073f85d4be1803ff25cf720f5ee866403b5cc61bf90d7 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..95e3eda609e0054139e7c09e9bd1f195049a81ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7231e1a111a66f75d66b311f09afb1e0881334ecd82fd63e428a2304077e5302 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..72d335d5834d01256e73226905a7c553e5aa6edf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98f5b9c8108ee698874f8e78becb7442ddc613d9f2fa5bd622564dd73c3e6ed4 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..42744a1aed47dcffd7c148bf2f1502f1b0c84ade --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a4d077b55a539d67be3a92778d82b2392cf8f9010147d54fcd79918b92db196 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..27db46516ce2c440e11d91d5e07dfc085c921095 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad5f7ab591529a928ec8aa4cef3fc8ade20e488c44dfa6b2de6dde324509cfa9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf7e12dddc3f1fd281017d76e743854498913f22 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9ade64cea834466b37c67092965f568ee92ba979976558c6c22e7a2d862b7d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..311cff60e3e7eb33eea5e9c8a7dd78840fefa816 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03f9ef27238d0f1c67853e131836c40b4b66d6455c4de85e4242334716a3927 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..34c29b759b72ed01dfc214bac0d6aff8b2301064 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ee6aff8d0442557751c195962bb6372dcd486196ff488822f060415300510c3 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..623ac06f7eeba59411d88e67785ca17d8b8bd642 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29dca6910a7a4f9c6334a91edb71cdb23e5863bfe45907e6463f7b3371860fc3 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d77362c2af00463debd318fcaa062e5aa54812c0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0284563a0c32a225cbf2e9be5b61c78a916eccc093822a41b49e1810b8517c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..052f1bbe1d93687096427b54b1e6cd1df99c9793 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e533c24dd1871d171249b0d08e5b784b7ca15870f86fecf2118d010fe2d6135 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2aac9dbb510548ff423d5bd7eb61a492d7bd6d13 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b24f55d47180f1ef15131f869269b8e55ee44815de33e67ae5f0f2ac140575f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c996be47f9db962a40664e92500583699efddec4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edad9f385c768a69c0c3ba762ddfe612496330abede182c06e76ead6b62ed2c3 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..43cd47eaf1e6e8987bfc0d150e2793b3b3e48abb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb863637e048b8d108c5c550a72d2b1c0d527703aafb3e6fbf42af18b1f11a6 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e57603652f4a8f64337cfbb86940257419f4ed0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c700f3e99392d5aa554bdf6c2a2ec25f5dbd7b1dc63c89f0b3886df8a774922e +size 8781 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ab2411b8147d5cfadc6dc823b53a7fccc45b293 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bfdaf3d604debdf0ab4f378bda492edfee1cceb6f76862877700696993f31e +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a367f6ec473cc526c8f323e73329c4da874368e0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c94b1f3d8849e3cb00986cc6dbf9e5403b0812c516fe3f2da8e9a185b7947872 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a10999608e3021cb9000798dbc0a3ff0d72cf3c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b6670fc36b5a322ac847bc0b7df88be0bd28492bd5d67e9b84b03949412f040 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..31045f854b65b635a4ee03a5ecaa090631976390 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e49235045c29f9df8de5bd9149b0189c3347bd81950c7665f4bc8f1be765599 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d1c1c2602a24337ffe6c95d00efbaca41a1ee4c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e96b75d6b7b666a75ba2b4a1146d51c757d22994509224cdd1736a3a2a36e086 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5a842a4d4b9661bc550a9ff95c8654b8387e1c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd4bce028ac9d80b7cfd2a5e44b21841be7acc081735c0fad886ad276da6ae9f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bed0a87b934355747a2825cc3b0e44f73915b5c4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b22a530f352116609f7a559f2effdd01360d7d0facf98d985a4c89f69a00e07 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..caba061c216cb3394108dc5bf4108f0ab0c4e8ce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a7f7684ab6ee97b518a7e648fa5dda98c04d23d8de578041a2a6a748f22eee3 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..26a62cd759586380ebda559f9c5fe69a8f50bc76 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b0ee2b94f8ba8f7bf129b85c5a3eddb55ec7071efb64869027c8fb79277baa4 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1f83344bb569867633857de511fe8477891ee8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5addd2973e89f8cedd7d8a282a2c722eb9da07e4553e9256d34af2a5558b032 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..04ba6f08355aa91ccd712e4ca6e84c8bd3d1e13e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:504acd0a765253dc8fc5c2c11d1de2e8f94df8d6eb8a9b4219aa858e261dd762 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..01056bc9924aa8476f6274ad13be708afd956ba8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dbada7f443be610aebb70737b615d69623c1a371aa51dccb4af9556da622dff +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1b3a6f3362ed735157cb210d063f9fd20f1f9e5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c7f2a6920e6955a65b797067b36e9587437022437381f2387c71607b7d46db0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..57542cf2917a22bf779f60e2dbd1d9633bc451b9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bacf747707bf2773237710cf4e9ac486c2cad678b296e205ef4ad31a2f76016 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bb91a98ca32bd888eeb68e22a96127d03022795 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5675fbffc29870761c41b094ba8a6da2e0f9802f8d562f4684b19cba4584f7c6 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.10.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a85882cb816ebe290a610c10d020fe760996afa6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:670f3a8c56d6a9eb907c8756203347015aeea63b3b1ea7f4dec23bc6e6c33e00 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.11.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7654fd004342a616de57f1b341e2e59a6bdeec42 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eda6a25fc4bd6fcf716e29faca58a3786b8a8c0c2af89b7af4df4c2f41b4239 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.11.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.11.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..75c9ea87eb351916f60bd7bed2a95b888b77c59f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67543b416ad8f0fc93040530a835ee0864db41ba8bac89e40acde2a12b79f074 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.11.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.11.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b58626a648468c6250c6b370e6bc4d29d3608a8b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b288e8c7e62b49873bc5559d9eb7e1f61d12226941543f618e9b6cf454ee1b7f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..664588d069488795a18f4b889b5148877ba80124 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8259666074cb1115942195e79617d2def498bd005608085ddc4b0542b200f46e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..906a62258958d46a1c553add27728c1fed837323 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c1bc31d7920e02bc48014bf15fe3e56248a4e2b81aa2bcb6364c9c767bcde0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..160e1520edbaee8a7bef1e31abc95b5d012fe52f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdc7a6e6b7f720c4fd3dd6af823580ca22c74045dd600e7f3af429e843e930af +size 8860 diff --git a/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cca7a82313c3a2a645208f8e88f358c4d6b07b5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1e88efcdd238cbc102e2dcde598d7f65bf6f1ce6641004cba63e3aa4c499594 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e98ade22bf96efcc3f315d9af941168e657f6ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c9738ab4e95a6f1fe7195e2d15647d55a01a7b16c9d19b7ee5294fd8ea6de88 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..273613f554b9b430e7823b685735d7e0b8100e44 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29af0b8d45fbee24618f1c19834dbac0258b5e6c2315bbf79b774c68ff4488d5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3d6a52b9904dad360b6fabb4323871a085a053e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a38c25eb6128c676373e2156bd03e12409313cc39103633693dba35b80fe93 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..26a9fe870410dea548ac989a4924c3ba9da9cf89 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c2a41c0be6dfdc71277061ff77538d7e0c7f713d9d5f0cef62af817d47dc3b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6dfae8a12b50756c684ec8b081cbb7f5877152cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:438756ea1a69af15f190344b4f5da733f0d9e9bd41876e7ddff73252d021abb5 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c03e044fe96a492f8ba1dbcd9dcccccd91635d8c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbcceded05bbeb54eaab4f3369662d8e95a3ac920b294ed69ecd9985c2dec3d3 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f094f6684bc7be3ad3c44173e599d47770b1493c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38663dfac204440d59110df8267ef5d2774ed1853fd40ea0e8341ef17e8b100d +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9b590861db8d1cee10bc5d5ee6cd10d198acb26 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11cdb4b1684b9b5ce4acdd2010be2a4f71a3fd17ea3847f73512d745da69eb1b +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0183e03ea59f470acddc4c328d8dc6d99d71a2c1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53ceabd99d22d29027dd33d3eb081f1197c0b16d6d65c2bbff8e33dcb654a2b4 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..42296a6645fc0baec63f519cb7ad490b19cf32f7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8efdcf1d0224406224fb974a6fb37be2e1e220a0c2cd11020b0d0bd5a04dd00a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..02cf355241df268bda8d0205159a0e42dd6d7338 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c08d59c30481dda55d5ce1e99fd34649060342c4e1b6901483cdb04d9a24e77 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2f6b622d78629e171a8c6866dc56f10973ec7b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff9574c46d664cc459acb81efbd4052d981223ffa58bd10f10a929417e054de +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f507fb204a59a5c9921c20201dc7dc30f3af1156 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3910a84a024dbc41877d5f4ea829f2a18ab71ce3e6ccf9f52e5c7e5ddc6c0529 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e07796bffa4e92a69bba7392d942dcbf1a3286e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a52cd292ace56f53aa9f76281a19fc7003894bb5470419a6980ede38dee2de +size 8860 diff --git a/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e075a10e05aca02ead6ddf431b817948c80c56ce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58a3e25c3bcab0d146fdda015e50614f2227488f72be599497623049682ebc29 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbee2e80a3b7c2d17460023c50c999d40a25aec3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3384953bc31c604f59668990d0611075d91cf5e60274aa65df3c5530f5c7860e +size 8781 diff --git a/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e939e43315e4ffa92756d530d77b97d59a87ff9d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394c598b0e695872464af8fb829885da09352605ea0777a708c2e8b686d6a368 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c477c0cec76aefe06c072e07c3f4d08682632ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e719381f28a97e7e08ad2f46b9a85851fc914c81b536061ab3e5174254daae1 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..918a024dc86159fa0e6c964e72ba2fc7b634f508 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e1793c7f0308a42e82bf1277f60ff4fc7a7c7f683b03546cb8e8fc46d41817 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d4dc003a516f36b5540f1a805933b2f9120f37f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41cc03a53db8f95446c742a293ae684024a9f05066bef7fb16df95315cb0f49f +size 2716 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c07e2b18e51ca9639b3cff738ebc477a6a4b79d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f5cf0cc9b1f9b60447c04130ae738e93d208be8449cc9b7eceff1560671670b +size 2731 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..92a060ddefcc71dd32926b3917cbcb49042f89b4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60232f9f5c8bf497164f20d321d0bcd6d401b39e5c5101c63de077f4ee124bab +size 2637 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb11a4ba85dd1a943e99e9c432ef5deefb2a8429 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae1918237048f6ac0374a38d43227460510c957fb16a1ebb25b83e6dcaa5ca2b +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c19d298108257f2767c6086a6dac927dfa1b276f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e7633c6cf8ada8114a28d3bb783fae4442c44eb0bd88e84c2529ffdfe8cc466 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e04165448f9458e17eca51cf8c35bd7d1f94a3b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdfd486e040d1c1300ce3b54549e68fe6c4aaffba143c24f8f7b6b65c5f6f797 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bec7a61ae2084abfe06a9c207e6a9fba324a88a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f5d8cc7b1d33d0e3d09615b2f92fa04e453c8ef2590cf6bd26fc30a236d895 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3dd2c5a41caf383e49f84e5b4acc2725ad856f8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:558a004150e4edd654fa4d891324f709a35d8c1d75179ee056c6bdd9502f99d4 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e51ddf48e40adf9062d005efe0beef43fff4bf4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66328d419ac39dc85904bb33265b54c783650ad3782435fd5272d36a9e152d0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ca0585815ff3ac0ded93a3add7e347e1e181209 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b025d19924f49b8624b9b6f62e041915a7483a79e3479154f652b8c8479f8c4c +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b552ef6db42905f385221e97c180c295d3d0ccf7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:201ea99e345cf27177af8821c4f664ded4b17346aae217ecd462ceed6239dc06 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8590e7db1dfc26a5756d8a4e42fb3b1696c8a3c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e04eb4309aa8295b4a521c9e17f554313be123b731ecf989edb62432d3c03531 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..24c0c5d8441a09ba213f71ced0b05b4d1c84300e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0aaa4f7c342e3b56d95e339e4eb655326f3c4f425dd6feac6bea20447e0c29a2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..079ebd369cb7e665ca090b8884c7299599da9588 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47c212d7f4dccaf036494d46b5b4a1c3496b7518bdd34ba5b59a65e15da38579 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..841f01da1a6a42590e50f1132dd647082c2c74c1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab447bde95484f361fc862e73905d7c9317227fdf88406eb5328825f0172d5f9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf5569e7d74b2b7393bcf8b9b6428d2ad77da518 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e8367382719deac9ac4a300e651d7d5d7091a49c2a344be83620c197ba74ae +size 8860 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b39189ac5e99f75c1400cb74edfe343405753bf1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f50bdd82a9a3f11b037abd0ab1437e3716f1c9e44ad9098d58cafebf408d500 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..15ce39b51ba3e97d180595310059178f450dda0f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d761e45a7a16f9bc2ab1cce2a5a5ce55fd4f9b587e3ea6b08aef15ce663a1ad +size 8781 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7809b4a4081b542a47f4d398585296c38bde0c1b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cefce237d7879cb5fc862cefc88bc5277feb6560c6202c122fdb6715ef4b3f91 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d812801503c8fbb1690300f2e8f41e821091102 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc4d1f51e4b4003b04db479a09e0166c02df122ff6a83298eba4c3da82e0c3b6 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f70f824a62a4a4e528018d2fe11786543b59776 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d9255839b53ca8c9df6e49f16871fe6cfbf4277dcca795e1c951aa2e029933 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..06bf03ebd2b327c60f92cf59461225e372f72829 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a6a2daea25f15ecb7c1195d6def0dc5a541c9d8023a1b1eecb6e8be3e8ccd4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a9de810138a5fd06c446850553f3967869ed4ef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2668836fdd3834ee584c572c67baa872436be573f722a744f72e1bb0d0a5673e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..00b33baf5c574d5e5cb8e1d235439e8ca6fe15ce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:885cbf81878989780075b96ea379978173a5be3e97abd5241f170a6ab1bbf0df +size 1165 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..564e539a0f0aca90ec76a5d75c17d5b71492031b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29007717a0a2ef6f93863525f34c65fa487698d33e09c5cb0896c3b4765fa2a7 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9af373cd399ae566e4fa49b3d30404f5fa357942 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1076605e097833b587c63fa837b321491511f5c0f18f03f9205f4136f636ddd0 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d075ed793096d214c96f20e7048e0327cfe1ebc9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a033ee38ec4d60a5b4009ffff181fda6c9b3c250a96802559129b99b4b80128 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bf5da16576a89ed378f61f632e738856f65f5f8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5991c88c613c8561fad6a732106d49dc771edf8f7bee9592465cab0f4703aec +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b48b0f64f619a03ce717b4ab811026cc9a667ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6de7fac27ff441acef69d111caa373e625c6182825f7f8ae0e2926cfcf7821d +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd0c383e483c8ea4cd31c7c3c250354e864b30cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d3baae5d7be739e756968a9edd01118112341b17d3c494a696a676f76100a7b +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d3990d142d81e3848b7dcd4797d36c582cd5e03 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b157a0febb303623dce596713c4a1afab40f7af3fee9c712346aac47680df53 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff2ed2bd83c03e1644ce92caec3a2eca9d77b08f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dba1ab44960ea3aa74d0439d9165d68134f836f019d8385ea026fe0b5b90c287 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..019ecea56b15e0c6d326eb1ed34bf75e5c4af4b2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d40f610843bb7ebe0197f1a8366c9174873b054c7442936722dc8494f77c573 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.11.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2c15fb99ae0d6eaa7fd94ab8d1841d527dfa72d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af2072e9dc8e3fbd37ca7f80845d9914274ac3aa9207a807d66e25bd58fde368 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.12.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..924a24147e911d2e2e7a977608fd6461073c5f32 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9062bd56b44b264a9539387882700dda7be87757420be36fb1f9dc4df74211fe +size 1195 diff --git a/global_step243198_universal/zero/model.layers.12.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.12.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e97148478dd0eca8364814eee0bed3a23407165 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13719572e729d195506b01373a93e14645fcf6f884ef31773148e864937ab62d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.12.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.12.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc108b784f4fed37607b6e46227b1689ade7306e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9777c23d311ff3edc6e3ccc2ec6db8ced0841a97b51d1d2659e22a014245e9ee +size 1180 diff --git a/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dcfb0d1dd412164b64bb3f9416d26ef5967da429 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d398e5330bd2a8a74d1ace366d8cd5b1acab5886db48bd79fee894406f715365 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7ed30eccb38da61dbc32cd7c49bb57d454bb49d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6766c8d75e71cc0ce21ec2ff9ca823802fc61b596a098893e6bf5843c405b87 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c014a9add9ae304bc3ca734d33abb3dc67995141 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d12b1615b3377a45d52a97da924d820441b3ce8b0c270f3ea059b28fb7eedfa1 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..40f1d75e533ee9f20439ebdef4ab87f953cbf353 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76961fa281f0e9c08a6113000c5e7d5ca67cbb91e850a4a968002c036d3f40a4 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c9b3997a186939c85f37a3ad7e79014850737d1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22d5b70b72fc33da360617d0cec60d5944da7d3bd8ea8df0f94b841f9543ddb8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..230b2860ebbbd7da75ed587f45603f0056e55046 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b06cd0205c2753b7b1b9ce1d5506ac6e5da998e5c23cf2997ed168ca67a37dc7 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6a98864ede7e5b9156cc03dad9d38258654067d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b73974f69c20e6b7ba019a6219edc3235ec7b86a632fcd4c1a402ca6a4211b2c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4be1c631b88357bd370bb56f035b2ed5672ae93d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1301e1ce766a2c2442b804cb2f4f08846ce682e19e43438adfa1b5ded9ac5768 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c973d0a835052b291623392e03743a356fd2e85 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d2cf7d9972aa9b3d8e8085f4d98d61f8d4029d13a8bceaf647e095e0d08b92 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c257a94f0e81e63a3807806c87ddf64dad8f97ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03ae089295e5daca1675c31d4e77404ca4d637bacd164484412edbc2adc5c2d +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9efb7e6f29e82f20b3969102df800b6dd20ae084 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1dd325766d7e817a1a5a714850147e5a68a4d5114a5e96744c3fefaea51b2ae +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba255d373209f01a012b85b65dc8a1c8413a6d2e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c384b9eb0fbf548d7b0d589f272e83a4325e560630ea23f5ee328698ae6c5df5 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d1a612fedf4af1fb1b89e762d5880ab2c48c306 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f58ad0a43349da9e5d0e9fe776f542b0f8fa9d8d1e6ec5588b7f32fa3c3a700 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..77f63d1336e916854779df8d97283237145e9d5d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5124569e3f85ce68d91861fc03c876bdf10de1fa77468e0b05261315625cff19 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8fb5c98de162d4889773393f76c707c0735d0e1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2880c858f60f3caddae3d9d45011fa6d9177c3eb9a0a97e8b2e207341b7ed1 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..828c886e1f4ee06055f88eb9a9841d37851f443e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7596eb94338f10c0a14a8a8463c932e8f89152ae936ceb7085f1ee1c0143de2 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8436b959d61133dd4ea16f325cbb44ae110d79b9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af0c7815fb0f18f39ef654db4a028f23849454bfda4e98c0d8f87b270d3650b +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a311788391e35373b0772cc471244a4483a91b10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14d83fbc45b8dc9a224fedfc664c8685f5e051af6cf966e8a8bf5de49e774788 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..15d9d0d9e34f66af4f565baa1988aaf26718c1c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3eb3fba7ce2f7e5a5f8466174f800cae44e048b96e90af3967c188af92cbe917 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9980f49b4ce02128fb155d4a8e48406b805f6191 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f5de3961935e30d3c50864936e673330be21aef06f9ea19bc626e493af915d1 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9773d99b6a0ca4f7f9aa3f0d9271fab1dce5b469 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5056c0247151e24803a4a17e8186c80e266cfbda715fd790fd44a38cf6a1b783 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..52977572ddcb31199984e88c2a9ff115db997ea5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f70ab556bff1b9af1964d3ded78c0a1f0cb6bd8eeceea489de8db5218f00650 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..413d9a9c459434e49ea68775bea76e82cadd91f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1748ec084753efc989b208b4a086f969afcd00ae36440ee62a4a529e3a366565 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f714b4466d88038d373968c0ee43904942e824c4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fca608e84ab7b32f4fc8bd5a8b647346fb10a888fffb7e29f8a10cf0a3091df6 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..384f6f07a02687b343c0654cef907ee9b564725a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ac1871d6e46c7babe7d0d759b271889a8e313058539b0b9a722c0b3c4dea9f3 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b7a2c36eb179ab077c956be8e38735a054e7113 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:680e5d40ccd7d35e2c28fb0257afca00a2ea9a608c34e9ed75bf98048fd42eee +size 2637 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..12904ce1fccdec11914382d955a9aae03367194c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbd57e8927a17d8593ae6a2d665942d496c9e1cc87379053729c1c7822837d4d +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..851bb05ddd2409a161ba8468fbb3f1f3dc45e0ee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd798ec080484bc7d33557c5dc98f43cee7b28622417373a54671bccd305639 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a59f2a6629d2867a72ecbe027b6c57b63f0a852 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0537dae1bb3909e9ec23edda317347ee6da4c8c0e38812b84a01777f6d0dc0a2 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd76378378aa128717befa45069114171b2fa9a9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60cc4c7b38f7948c16458d7ab83764d62c55d90941508bec050231861914c195 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..813bb98a7f6d0b1c5a8fae0695720088389c8a92 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e3e0cd9f39bda2147a4b40164ca4823286dcfb6ac5ce67dfc16cdfb2888307a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d48c0cc3ee5856858523662a3be2a3c36f86f41 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1af8ce4dbd65ae53bea4c395ce1f4ecf3d8aef6983f77161d71d0188a2b73b16 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..63397e7f9636ce36202fb1424ebdaf52e109ec85 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7d265a5a359c38aff4806a3dd917360d62a0c0aee5533b107a7c61449e3989a +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c5854feef66bb0f7bfbef76723a128f48ef3b11 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fea1fb872c031ed3f4f72bdbe4960dcea9d06877c1aba59865d6f5bafe6ba68 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad7f258fb223c59192c7f49a619c68ef50d613bb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2508f5ecd2e866433bea0de71acbed9fbb6dd1af84e24bfc9ddb2e30bcbad861 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b319970a55cb02c6b1b1e11bcf9b175558f5d218 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a7e2e1e8ff48f4513cee39e6bd2244545168ee74fa20e1c811e7abe9fe69a5e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..235f3ba4c27600b28d6ffb7e4cba04b7a6ee894d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b93831ce6bdd3fee195b1d524f9df3e3a808e0642adab6a498dd08c8608943c2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b97dd091599ead061d82828ea2a6c6561811549 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:041705713a01e1e86966bdf67597338ccf301bd8025a909327c0bb3c875fd1db +size 1165 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e2cea257d7971d1f47825f6e71f492379814aba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f537b6c85510d82002c4381ef5bccc8e163cbd1d17c3c2aca9a13ac7a8a9381f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c1ceb44bf7e60b13f4b8855de918355a255d0f8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04ab106eb08541481723df8aaac2858765515e3168be64962425ff1f17d980bc +size 8875 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5abecf2816de02f4177ab466f134f25cb42c38e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64b88656fddc5dc29c9cd4c105f07b986ab9f95f796a2fdb349af65e323c026 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..530f7408225706de1466d733464447211711b4db --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdb2a0dbc8f460cbc257a3256b1a196ad333c703613e7517166bee54775e1b49 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..701d3b8df69aa7a7af29b53b95aa1b4a9d63d5f9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7882e02015db3e583e91b282ba9b9d1e6d39e4ff5482b85bee8fb4d572fe0dc +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab26a5f0a61203dbee1726742fe9b6e4ca21ba51 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20192abe8b0ee544df8dfb95eaa0ea1bae5d1e3b92ba65005025be63ed4b3fba +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc1d23129b4bbd0e6b327046c005eb9c2da33658 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552846fc6bbfef565eb79b42f8dfd8a8ec57d4bdaa1c53735df485eb135b6d68 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccaafb2b15571b1ab2a911e788b007431d14061c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41e2299937f31bc3039688612188a8de8d797545bbf1fb8ed3b187202d679e9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7c5f3c79a872c206df723df061d508d7ccedb6d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3c0f62bf19b80c702c7e895d245052df3520d2060136547cb7a05c67679d30b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3661e3c59c0fc0b7d27fda253eb23148bf15b584 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a03fa2da4c3036e43a01dc11130d6162bd802128fea0f42e4c60da25ae09790 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..67bff9232ab4cc0e8ca4cb29bd4791b466d32438 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0281da640bf72429ab91a2cac1fb4e42595c26a625a8bd7e79cdab796d82129 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f8193507d18b36c2fed894c66ed5cf97a087159 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3574fbe2b04b09e58e5b6bc0e4d0a5bdc4749924209d20b82cfe780a998ea56 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a6be1f48f5e43e27daefe7301bd82dd179661e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd559801a917cb9741d7a203de7051a6aa3f7d2a872aa13affe29129a104ba9b +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a72a95ad8ea9adbd91918fa7bf727496c468075 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d9f167118eb96065f9e40ced5f5329ec2846090d7d4f443eba64a39a43b41ec +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2437855c474315e6102c01bf97453e576531f14 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa49a84b623dae4cb218f893dc10baf8a5cba5e885adcecb71148d13b474405 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b55da1af57af06ca995a2a5cbf72d47eebb76a13 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c07966e6514cc4377219184532267827cabb73d9c90a86466fe5593c67bba947 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2368bbf3a492d601c9049ff137b21356efe027cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b967e70f1e4d8f4bba54abfa0f1f7d6a446f07c9821cc0b30a0bd895f43ec8b4 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cabaaee61ead92985a5b593b611ca77eed6381c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c4834ed0cc4e08dc835d9cf5eb95ca74d1703de4537da4381163d77ef9cae7e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.12.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2400496884488158cfbe4e277ab23caeee425c44 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b15de17e3408cb7527deed126a32a39be1a6126fcc4ac689680298b6c6e2b7 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.13.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..81912a4619668e7c9d3d2704a0b4ac0fb9855f5c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ba242666d0627d119e89d86e984f4151d02e6524a69513b05b26c4515fd74f9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.13.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.13.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..93c1683f930bbb1064fb8c14e87a4e2c2985d61c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6f01a604cdfbacc90f3eb8b5571a5a297be1dc6199a3ed1b98ea5a811ce8e5c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.13.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.13.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc823a657cd2dd1bed07d64e54fcd9e74353ac9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:275623d6af541cefc089c7875cb206fd6ff0c58227e1228ff2db7213b56ef389 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..be00380e1cd32492462dd2a321aa9980c82de108 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38563236b3adddfe2f234f26f7772012e3c994c160718270b8d3fa49df2a8b5d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c667f44c473787e35d0166521f8bd2d6ca06988a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96674b31f94362a0d5c1d20ab473844c64dead27cae5624a7e58ea968c9f7e1 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa3c101aa63ba4bc5f4c00d5fc11cf925cfddfd1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30d2d7e081df23014eaceaec9eb6fd5cec90628e2cb0a27eba419e33306e8191 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..28f7f81af1b14bd48bec7c8e9a3f3b50e307f69b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42cdd649a985be6e09097274b44c51216979bf68e0e12f6efbd9d6ff3cbbf172 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..30026670ec94afdde0bff8fada94ca8bf3fa8b94 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29cc34345ba4d67d9ca055db7540466d4ba1aa5fcb4cfbca3afdbd7f29f701d3 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6622f01fc2d6100bfa095f56d814de7e4bcd03bd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16f4401864f7943118789a1cc5498b4b9b278a78920f47eec2f1baa77f905ff2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8933619a822846f65e1680a16b4bc5be64f58fdd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099ffebf6c556da8a2391469cf493311035838d59352c50c09d362f562a8ee97 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..64a72ac32b01f075ccb3f6b04b8aa2385ed4d266 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a72f2709c52c83a6c4f10439b6b910f6f70195dcd46c88abbdf9da75fed1df6 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..45088c506ecab86f2d9923846a335ac6e9031a92 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d347da52e9daba654e5408ba6743dd5d0bb6f42962e0ae5955f998c3d844a11 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1247d885d686c29d7176696688f0c602c856fedc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18300a622f2e184ec8f75d934a899cd12d17b3e44a93685e27a6eeeaf4a0bea9 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..491689f4919c3df8a3cdaef71324fbbdfbaa3519 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8c7ccb6743c470dae2ceb306009a907cea3cb4f26623b928e33691994e928d +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fc1be85eac6588e5edb58036de885dff45cf400 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b51dd9713e647e7dcc971f6b76888aa9b86665d73a4886cb6e7bb262498db2f +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..eea36a53fdcbbc636e3a7186c63701a60d685d7a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b56b3a3c6b9c941a192c3a111ed0dcd8f04f8732475cabdbcab2095b4306a51a +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e416b753c96f3db739c86d00faa2cbee0ff2d52 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb4ec362fa3d80cd783b93bc68bc9cd0353bbbda6e0e6146a06a0043c8f7b3f8 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f120098185b713eec4bcafbc8a0f71e75990f7f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81040202ada9af4ebed3310d3c4b76493d5bf61a34400f8a17509972065eac46 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..046f9810cb8ae13857da99ee5406915f36e47a32 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2151daba838fd7177feee2c2375b947ba5e7586858cfbe55edef25d211d85cbc +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..44f56a460e7d1120b0a672c508e230fa1d3a1efb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a6df8cd2bf416e0449ca31ac5b995ebf0eb7c538961ab385fa6abad6762d77 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..eecdd9347f8bc483eed615ec42a2449365a87dcf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0011e05daf93ee38c0ed679dcd65a2f0f1fcfded9f3569ca2bdd7944d8a5fe94 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c460ffcf4e44197a4449cd5015705c3f07b67f39 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fe37569d7416529fcfb37de1613d2f4fb0a348079c9d32a7463c2579093980a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e4f796de953ccf563dc0d4c8314f636da93d28d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:037c8474163f682f0b0de344bb5ec330f3bcafded84af7469ca3eb3f09b4ab46 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e259c3ec5cc7dab83c9ccf1658588f8be98e5fd5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa3b5509c1d1ae8bfb13a1402530b504d1187e741647798f47306a8ac9971e3a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a27d22bc3f03d2a90ab09f1dd170ac6550addcc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f2202cc80afcaf598140146f51b3780d831592b6450481204d10534dbc823af +size 1195 diff --git a/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a041315bc614daeb0ac69f0c3325bccd1098898 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:228c2965005a6854130e4ca16cf804219f576c3f91e6dc4e9e0935d378ef3488 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4df69ef83cda4596978884e64803e429ee23360b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89be5739ad94fc4cac72c15b998848dd5095f220d652df9208c536e84faf0217 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a82871603ba1934d3fa7f2b4b319102b0b8826da --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fa6d71cc01b3ecb45173489c3a7b905cb06cdcdda7bb300f8d33d7c993db9cc +size 2731 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef90dfed8250b95021de172efdb369e796c3fc40 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e53e18bb483f3d0f7de2d0a9f255ed67085f815e58e1e195fd6d39c1f28921c8 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a4b00730baab05f030fe93b6f87fb38e00f8d12 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc061610b74ab42f7fdb98cfbfbd448b9417ed3ebd8f0088df8d598bfd48c09 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..162fa60415db83c69e59887640ee2568136bf1ae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cdfa4a045208556c606d418adfa29c7308bfb00c90418f420703f20e369e901 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..073797bdc1ca34f17e7bb85c6fc28b07b941dba4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b951ae0228257506844d46063aa5924eac4b45895f8c695b89bd7374d75e4694 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..58d44a0b05496fb5fcde18c999384949aaed8ea5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6fd138d74776b97cef806300bcbd7ef5f23c9361c3d023d4727ff1ec7cda003 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..949a6f628990602275630992c7b9581baf86fc4e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c910678f23a3ba268c0de52f7025d902a1866bffdbcad91db32c3af2807624e2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4997a97eb52d78657021bbd3d2967d5a8acefd54 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a7fa207db13c85a4ac37ebca635c25e46b5869285cf06f83f694993bc613d3 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6899d4f5cc9349b1e1efa592dce092930ec932c4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5713a06818352af17cd880f9da8d9d676ecd311e40ac36d6b8d0de24c10498f8 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c867b5f2882c8c4b4d79f539c575e413350e1c82 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d65dfcbc7f7f63a9fca9bda7f42cdfaa287af27b725a7dfdef61c62f0f1807 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2664a7020d0ca7a80c88c7fa3df4e8bd3de8a5a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45c1f06a263686606b7e8526a2969373bec098a2e4afa1c0ddd277c5bfa793c8 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f821711d5072f6701652ecb4b70a08aad195d63 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b0343be8d54e0d36ee8e6916b740b9dc7d94fcfa2b012771a1af781095a63f9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..28923a2314bea9e44f1840591bb3832cba586326 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8eca7163bd87ddf5c81c538dde95d875a9a674afdafdb91282cf0224040b3e5 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..231a33d38251c87dc6f36e7a72db5161844b6dc1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd99f7fc9997fcc02299ba284dea9e4531ea4b3df43a1022a67cf882ef58132 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c18f6c1cab93b41f6bd9b62b27b21f90969a3216 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d4617f9bc85bf5d522c3fe217f7ef816bd783ab1fdd24e1a8a2f6b243dc573 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..df7e5a39c61491ed16a8c2d3e52f3c04ba811fe1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd6792b30da13c9102574912934ec7dcbecf786f7c57c59522323a01b47251bf +size 8875 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ca9278a5eb738f25fdc7f7a06c0b75fe2df081d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a879f2bd56e706fb587ca117f04d9e925085f6893c7ddb8275b0364c3332db2 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7732789894c48a0db1cf958501e9c6414b55a2c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41d376c84b8aa0f70df758e43521cd08075b05276ea54bf6e1f7e3d5f958e750 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7220a6e8dc6661df2002e57773d441550f731040 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fde0717687aaff724036f8eb4b02270fccfcff35d22918954b976e3f6d80c607 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf5ec9294cdd983634c46f7fad6f42a217e858da --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd78a6ce993aeb970e8c4e0b567900ed30b6d3c10e026d3b8379cd6cfc8168ac +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..993c436c4ae906ab87bcdf72e01a821574bc3d9c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53088245fec1c5e4157d9d66b2347292324d6d4065c8e07723804478f05cc77d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd7a5fd272776a5df34380e8e5da766d8e1f7362 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abda56e2b99b26f6d0c23a3a92c5c8b791b8b53c3f779dd3bf0c6644cda7ba74 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd8fcceab7363f12a4e1d8a234807464f60038bd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68e31bcfa535ea5ab80aa41c7cf9e20b702e3af18fc4a669b76306070b398a96 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2bbeb1e526379f4cccdc8d09d58360d565a69e2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b70f97df6f2eb3ab6e56e672620650e5fca0672b8a9c274a794d65e420d1c71 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a2813792da6d3828bc82926219ea1ac521dcc9c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f299ace19972a243cc83b32d6009710a60229c8845628d026007fdb48f47e6cb +size 2731 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbb384eff3a2accdb405e5c04ee129072a3ed41d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccedd1f19f9706f0949716c5f67a452387b1ca128d82c9f1518350affc4366a1 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..598536577020ac85561163e1a53b2c14739eb328 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce744898e6c0e458575130694e852a60e6ddce2f900df54f3445fc1e7b16a2e3 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c775ec33ed201418cd53d3c8e8606b4d5496850 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8595471d4bab5043a455fd0fff35bcb24b218addb3e8c80df94446d1d26f2981 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9535dc673fce6e27ac004025d6dafda27b7fe98c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c8f34856032caa0cc5eb903cd7eb743249446c67dd6eeee28055dd1d06b7e9 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..40f180ce1df1bda2c5c3e326c1bbc14761917194 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f2599ba25593135110926ab2c0dc6d7e259ccfc870de86c639197a05fce367 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..752370dd5a46d081138536f672ba17de23b9e807 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03cded72d9ae12ed45bfffc412b29703ae2a7cd43aaf62e38f68afb90dd22d9b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..48cf4ce8d6b15224f1ba3705001cac00e08d8919 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d59ade62b8f0ff97fd6f3130c0f451c885e7d165d4e548ba57b3adc7d89a4a7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.13.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a249137bc35d4a47ee5502b8b187b172230e9be7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03b7918b6d5880dd2c6c609dcce0ecc4dc06eda2af7502a46e0338633806a36e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.14.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecc075fc7def7d5ea81be893a954f72a210731c1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9eac2096b88344995f487026a9568ca14397344948a099e39333f4684547a37 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.14.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.14.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..357d80aa9fc9a61ad0970b0fe34c0fddd6f3492c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4688bc8a7dd0885e8172586c1aea4c3dc13c44bfc517a05311e40e6124cc7aa2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.14.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.14.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a29f1ae10bddf525b552aac318d6f7c569490c0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60353939caf34ffee53cbf168af8bc464bac9447cbe92fc359f85bc4a954bfe8 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..81853a6f5024b92e24d598e9cbf92f8a7b2f2598 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00872c06b24ca8a0220d1c53f5321256d6085d798a8bd4a7564e78ec9917719 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..678debe086d17ae17a6581fcfa379751b8291efb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54ec5deb6b2ee44dba4bd0bc59af9f77573e4d610579ed81b22ba2642d95599d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c41fa3e4143fbe40f5bbd50952c95d42a523a94e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83a13babb5d8d18d718e2cddc42fc922afede33754521152c0f2ec6257e8478 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e75d0b0ec3ca033cb279a5d56c18da150fc662ce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98f8919bc7b6ca5330bb06a4cc09d0113c444799082f9cd663afb15cd419d05 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad3a2519d83fb205cac78ef249532e42d3240c52 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7406564f85c7a3241c2e04faf6d106eaeaaf26ca874f19a67955718f70055b21 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..862564d627efb291a986b5917fe5e34064b8d489 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c71a188e9480ed97e3a637acc997e1e96ee1f23d1aa8ed89f68c13c263681e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..96668f17dbe4dc6aea8830921da8bbc4d3928e55 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c05c6320494ae156f03a36c9ccc425e503956801b985215b5194ee2bfa5c2d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d4f028a39040a1303984b80481346eb7c73c7b4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854ea2f999b6c58df41eb7a6057f5db22fef3ffa11ff880b048e6a599731b933 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e5bf18a93f75e75ab263f5979934670901488be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2af21e235cadbd66a2d4cb9d2111ad515ecc335ea919e65ce6e9b329b6d62fd2 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3963b696d24ae3a03632d5e99f3ac2d36942db9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28b04707af90fcd26907dd8bf0ae0bb27aab114336bbe810539bec77a3d60fb0 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6ba59e51da7fae716596378bade03894e5504ba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72b84565733a42f228e83686d08ef31b23d3230ffdabda6ad254f076d4007d32 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2253b26dc8643aa731bce9de8eeaefdd41321c07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2824fb081e3ddd99508218bd0b9778e3dea507b97dc06ce8ecfb793720d843fc +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f732e278ce38df41a83635e051133d8757b80c9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e67984f4c15408f0cffb7451ba9c6723efb6f8f531944e46495d1bb28c2efec +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..13adbdcf0e1c1cdae65efbf0bbaf8084f5bad7c4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bd9cdbcf5ae6a714145fb9a961356c0e271971d2c7a9bf47d6a4eb813704e96 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fea652fa268599337ee04f412fb83cf8e68fb84f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f1bf9cfefd1b1f2b0de932b8a62f3372453e35c9df4daaa4359f9a4bb6131e +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1dfae8bde6ce91395d113a25d2757e141816383 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48b72df48ff6697cee8322030a26c330985e6c99fa924b37ed342427e01bef0 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f74dfe2312fa9bb6c901cca07dbf7cb3cdb09905 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ef6578e7bdc5e6854eb5dc675c952cf7a68496b2b47630ba9255d4ad92257a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c46297afc4a107ec9180c0697dfb09c16a33c249 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd6838903863271dedb4c0927acf8b641323328f70c5da71afc0851a3114f9da +size 8860 diff --git a/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0894a6b5483e6cf210bb1ae158b358ad26ac7b6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cff0a3ffb5b5cb72414284fd103dba1dc46fd84121ae4766a5aaef5e42d9a88 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..97147730918e13a39f3bf9e058b25085773efb60 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:914fc7fc3e3ca3d49ddeb898c5205e56735008dadeb9d95ebb7650efb015e752 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e8bad2f08855032d5ff6b755428c9aed479fd0e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e36c215d52aa4e8f6610cc8ddf6c2a65d77c491d0b0b3d9a9d732760a340006d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3198136c2d1b569a114825b3abb797eda2956471 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc20b9f1f3a33298c42fab8201cc7916a5548939c773aee02295f59ca3fa56db +size 1195 diff --git a/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6c754ecae1cccf9c2ea911a5c7ae817f8e7c2b5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6900a0a43046b4770d4889f973b12656cc9c8db27876cc5eb355bf975900bb9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cac00d83853fc35722508a6ea35db23f9e4cb9f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:100677cfa415d6f3e6763a890a77861f504112ad54cf6be67dd3a9b803ffc90c +size 2716 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5697c4ff6b5ce5bb80c0d6b50ae278dd207e5a10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7367fb3863e7d71d95d1c17fe20ed076c1f5a442da908ec779b588b16ef6bcf +size 2731 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..65bee6e67f23207b54cdd3b2cd5faa965f1ecc64 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c61847b47db620f293cb9469f43d00e5f78528e82032bffe8c2c6eed28a2b3e4 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f396284d2d753b0e22aeffd0e0dbe715449be9e1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:395a6ac9625a4478394711450f20fba2ce95d59035a3c75f1d51a3da88846e29 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e950c244873b6b640e676fdf2eb21c1601e3adfa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2592dc53bd9c6701c063ad7f6ea350f9c1204ece2060395cd6d395bcb28c0487 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4011374c189d53aa6a0196153999b57f44739172 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de6663a4db79341e64e9bb650926ffed6a1640ce86374ee88abcbf755149f9cb +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f5979e919948d60bd9c33527cdbab7e602e04b8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e46c9d52130773e17999c30b9a5f448665e84a82ad230549d11ead1f1b6800a6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f80acefaf2060189a5292c96701be57467f6237 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4289ef6243ce5561b0eec4b17858b1b684d539df3124b06dc0ecf54d769bb591 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b5be3a02ded0b712cb1b740c3c8bea7a3967bfa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8eb7b88746bd6307151c42deca2da0197e2f1610118abc24081f81461571e57 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..efb4ba366423bfdbbca0d7291dde366c970c5094 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bbb3077c6ac6583b2ac56015f40bda4127e9bde292143aafade68f933d1e949 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6d0d422672a032da5098bcf12a15d70090e2d9d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a38d47b7cc1b46cde0db34ef3934b218918f6dc3c58e8cbf7d0171758f76af21 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e21f4caa05cfd56c0c9cd37ffbeae54944e563ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0261ef3a036734adb9fbecd7483dee916bb8afa2f56165a39c00c77bdaf25e82 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7a3c3172d2938987543393b99cf0adcebd659a7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66182cc563a6925fd1b8388ec6a67bbe2b434f66ac00fc0a6d44cb16c376808 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..be9655a60419bacedcfb84f8b98f7dee0a4d1dc6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0919ba52a3ec7b786c53047f54e12be19b4fb058853256f1ca385431133c2969 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6cf5ba7721cdc5cc08e9c5faa0bdc7c5cd74ecb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b43fae9f0c15fdfe8bd987c36910fda9f3b4e66843371e4cf209f8d1bf30e49 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c6b859e05a245d58e892e64e42429898d392ce7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b678a1bb61e2790b3a4c1614f9676666799afac517a971d36ad71e89247a2dd +size 8860 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b6f7d0edca28a273b78bc6524d52c55849f870a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a32071ebf94f69a217b53890823d9b4c10b9347ffd99ffb5311db92b381fb04 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee627d4f9d064c7e22313900f362624bda8545ef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe625ff1a16c22ce5b58bbcf0cbf85994152eefdd2f06645305aa17fa860e804 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a87e657b22be414a2ff8abe4623f3b183d309cc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b650649fa4bcaeeb6d0eac4958ebac8f98188171efdbcfb1e67c075a777b6a21 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb3c8e3166f40fdb59b9498d32d61c92ea49d9dd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c53506c3eacccda2842d92da4e1fecc7c093b84146af9d6c42e6fd1bd2c9fff1 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7305ee485369ff24c320d1e54f40bfba4af37ead --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b87b6eaaec2caba61930f35c62dbfeb9af135cf3e3f024c8ee184568261707 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..04ed0b800553c1b14a8d674d40e43d586395142f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9bc821916d9f8e2ef5d3a55eb49e89397fdd46314c372e7cb48872cf9056e93 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ed7714261117a774f1af4c373e69669c9b18c07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b48aa8709ec90c70a6f71f892c7b73da7eacac38c4371bab30b3fd3499d54c2a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c77add4f0008db227a620760ab77e635c0f368aa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3a76b159bc684590cc2bae1678be53013ff873757fc7eb1210b6d163cfae405 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8481b2ce3a6bc56054750c3a0e30bad4679d0d89 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1195c926312cdb1fd550503471ed12b928beab91da07a7df1c391c3eb19a4afc +size 2716 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a88b32397cf8aa6bda88186e9a24a92ccb90ced7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df1084eee7b1bc63eb05f6e8f0d2b20249221ea1ae552ce82002e92754440ffa +size 2731 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4890605b81c17d8b2b466cb6e46031a3684d5346 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04bbb9a2f9f3858fac4b95ff1ad5d2f6d8ddf1cfc73acd1b79a6607cb13c5184 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c84497aa4d2a4f32b88eb752d52326f0b37693bd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0cb745f7d74696508b52110983e557a08729aaedc51752504d1d2273d5449e +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a881bd33522276abd4566823c0dde3146e085bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4a834b1a6ccc9402be32f924f9cdb2bae32c9eea4c18f2056d8666beadf06fc +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ffdedf172cd61ff21b419328a1ee6b21ad56271 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2335490d39da79fc8c34e277806fa7aac964d782d08d5fb853445329fb6195a +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e2fe97bf47eca50078aa1bb33266e61943ddad0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93cad801c098d88b9ac9f854ed45e196e059fd81be2b65403797a4d29a35f7c5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..db2218e12db5deabcd44a2d4cf403a3e5665f562 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e92b5c3c33cdb191ace7f2fbded726b8d104489074e7935b186cecc599c113a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa0edadf9595b71ede1853336588e87b42bf993f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e152509e81c8cbbd1305dd3c94bdfdd7b3b57dd63c9e0d9c98c4bf2700818e7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.14.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0434d0e016f4560ad82ebb916424aa1a7fbefc5c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db6c3ab77f87421ed45f8403a329c0a024963aa796af21eab57978011758f34a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.15.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c5b05de52c04fc9a0db7b42880d3daf84e6a16c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e40522db8b3cc2cc5698f953b611f8259e4ef51f7b7fabc240029b2bec9f6034 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.15.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.15.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8f76e2aaeb0b87f8c54d1ecb34ff9516ad14d01 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16cec9215893c8cf5fd7f51d0ca24c8e411904985eb48026b66cff0b21b50947 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.15.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.15.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6460ec9767a4d139f770e098dc2c1b913fbb85e2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92517fd526e2a133b62565d05d42279afa95a5bfa11b2eb23e418e7039d38710 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8235ace785f14dcdc827d6dac2622eae7943702a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f56653c6e30118ba0af3dec49b04d0111d552999d2654ffab9b6350d1a6f72 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..61ff31723b81076d94ef9083ac3f2e45ee9fe559 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fb22f88d7548736d84aa7730e30e4e286157dc7211a635eab6bf19e70246651 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9070a83f39a185728f226519102b92f9fbde22d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20ee841db075d74383afe889a7fc09c61f3d19b9bc71d7666b801557ef522aa9 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..70cf7e3b3bde549ad7e11efc0eedc89462ca8dc1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b6d97cd0f98cda0bce2b2cf392d1648d91ccd6c9b274b078430415c50b519e9 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4af9d2af8707ed0de76dc721e3e573ff0a36af0f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bdf4e8cd6b0d79368281faf78e654cd6660820d1970ebf91632fbebdf26487c +size 8781 diff --git a/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..473fa38bcd9e51707cb2a6f7a70cac5f90ff6fce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:070529f349def4aaf855cf185a26bb951891041ec5ed2bfc2a307131498d23c1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..55796808db291009a6d8bc1ea7066d806f5925f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8797dc3eb2ba10ae8cdfa6d04b2fa2e18ef4144496a28336e46fe82baffded3 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c71d4b3dd2d894926da5d5fce7248e04eb69cb5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6a5d4583da6ef56c4fb62ca45eac7532a1a6d53b73f89128aa2df06cca5361c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f159061c6cd5ccbdb10b495b5f4aa3d1a1f58446 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35bef3567e3bfb16c1ede0ad5ca4da537857884fc68cb994ff41b3b75ea57195 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1683f59ac99c35bc39797cbe1aa46a4706ae5f64 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:364b16012c2b79782dc70ee2bd54d9d71834c652513d7e94756a19e3010b0420 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..43c8aed471c4909cc304489202d02d41e35b5a69 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa29e29b302c46ac4245add26b39434ffcd3e16ee8f943ae7854037027b035af +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e723db26b3de64ea7401b852ef1c3ba1167aae6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5716ed74e0c0bdd782cdf8539a47efe113dff30c7b756480e6305c789c2e2183 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cf024a4036be23909be94e4e2b1c4be4c424465 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52420feb622a1cd8d30579211289f19b95c35142c604974a63d5c054a85fd144 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..35a5b5d78cc4820bb4b0e687fe07acb8021dd51a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05c290568f85f1bbfac055afef2ff7242ac32fca66eeb8440e4de6c5a7d929a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9eb51a9c488bded2e857cab5787fea5277bb98a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:701bfc510323758bb574a2d8bd3839bdd785103a8831f691a501732ca254b8c0 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..623aa575899f2321d6a97cf27b40a6b761e3f786 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c1acb67240b007a513170a23b153974739a5bfecc7c87438f22d634cf09858f +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..26eb6db9d516845965f8b4015264aafeac738c07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ec9ca6f51521dc06420af8c919e5055b2ad8f5173f2683d523d060d46a1062 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b16aca92bc2a0045fad5b8ef420147ee46b6044e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44155ad288e8e0af30ee72a6de2d95f483e584b7324be63483cfa36f369cc56b +size 8860 diff --git a/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..74f7aa04816a4121909517c51948f04ebe977c6d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74cdadbefb042f56409a94dafa97741ff079fdad77016bcd07e44ed8b7fa7bc4 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bcb9b1ec760087555dd43eae2a8d10d0eaafff7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6adb2775a782099ed6595d90b8752cc3309860fa7cc5c48047668cf96332f370 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ff6df03cd4875850c620c128553b891d20ba8f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8665cff01e3b6e665998f695531d1040514b94e69a18678be0bec0bf24eeec4b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5ee8fd47d83e89b5b7406801660d310f84c363c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed117e30c459fb1c5171d99d5b9d1599f5464d51cfe57d2bbb8d5046bb5d173 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e3dd62638b21681a678181244aa69ac7640580f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ab7ad7a7bf82bf85f0c89f5c02e7ba70d80793ae34572fb3fe5c89308533b6b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ca4a9af76219c403de7b3e5a81dab31a7315445 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f446a3d2de829c26e161d415e468c6a1edb294adbe7d6a6f19d19db59552d639 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc0b8a1ce4f1fb4aa54a3c92512d52b44def63ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31fa8113db7e13462ae3f18e30ee1adad98696ca46d468d53a9a393b944f37ab +size 2731 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1be9fa106d45f3dc9e54370fad831c84c6f15a10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afe9d09f081dacb8746ab330fdb69565fdd30953ae724fba7897a69cda62ddf0 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce8dcb693383f074ccfd67e4a9f03bf10ab3f98b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:470e5c9b07f8a83152318855edd81b63a6984359875ed10b2256ad01e8a284a4 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c38e2144c4e266aa79465a7ff52f7b45d777207e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6fb57f24acd8828174c7d13c8dd56d98dd49a4e73a7243306aceeae54c35d6f +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0cb99584529aecdc46dc921e186a9ff23459961 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75f1eceaa404e5197bd173dd8ef51a55753b2155137fb8d00f602f8b09f4ae93 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..11fd5b94106f67be9f3fe7e5f82ee0be8aed25f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b097e69bd394a05d475150419072fa4a3b00c5e248626a2ead6436b1bfb5b71b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd359ac37975c4b72c55194777a547ae85d4d0c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86e8ceccd2273dd885a7248df58e3acc998f0cc07124e7f83e950d900d3bdb66 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ddac841d56b1c0e624a14b9324c8b26e92589bb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b282e56cfeb2a180edf376737a0f188d6ae7943a7d240f372b4ef4c2d93e0b0f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b249190befdf7557edd4104c340f04a2e7ecd00 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5b2b3c1dd9adec228cda57c62a22ae63aac7af9a2ce6657b34093c5ab91260 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ebddca8ef4bf63170805b5f01165e5e6186aceb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee6570460053aa3c22391ef0c85710c622c286a9121fb68ba56c5416146ebdc4 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1616c5d6997d5724c0527fe4f7f53717876597e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c8a915f53489ae7c7a52baeffb4f197a1848bf1d760d0a58f5f031370977f2a +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e67fe56bd62f60ab1fbca84500fc0aed3459db46 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35d870003e554ba01751f7e084db1e1ab48cb427cfbbdfc155603f5036d781a6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab9c73efa4d1683ecdcf34b6815bdd252b8965ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:273a8754c904fae484e4c12ffcae7ce72eac9d6229e341a147eef13d80a93d49 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..77050d9451b3394b88ead09ed2bc45f4cd2dbb26 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5fd9326a09849dfd3c3972abb2c32ef0bd313bd4bb05b6d47ea5984d4cfed60 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ac7dbbf70f34d33ddd804fb6fa32f94e7c30d25 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dd82319f9c7a91ae0da55847c278c8ecbc0edcbaacb370ea4c1ee4f55af0e5d +size 8860 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..825bf2f7de8934d18daf08f7cf818bb053a68546 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e515e13af5b0e8f400b2f559359067f375a4cec0a7dca551289f47dd65c14bf3 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3119a95e2cddfbb213ee1b9cccdbb3cd4b3739a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc8af473d8ac22e1055e3e5b72284904f1c4b4d128171b553deee3d62e2d08f7 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..844f8f063741dca8c7c1320ea1f913fa3b5dbf96 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9639f8bc170b585f60e5071d9d5fd53002841ecc551fda28d6e07380847a2776 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae96c8f3cab7d9f7a91a4be432214154e1d5ffee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a1bb0596d7223fa5133fd4ec10d7727227017ecbaa0a0a0a21be56a04eba6b7 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..51eb068177e32579c7d9973fea7fe5d7eccabddb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38e58eb26edb948640a39d74e1166f1ff16208a47a6e9b9781ff5fe83bdde732 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..484044b503e67ad909625d9044491fdeca36f7e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5fce75ad85e6325168329b5d994a9da6ad6980c0de232076ce2b01c597b769 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..815e888c49fb045ade2b1451af27065d5204da8c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb72722cde42bb88711132b5afc93ac3873f9dd47f9e57cc44edaa78e9892882 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2799390f4e0e7ce953b0d424bee19ab5a716a566 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c494ef5f2ae8696f1ffb49389008560aaa58365e41bca7f74d7c2c0187de0b48 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c401b98297c0c01654bf05c0d6a5c7a34857022 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f75d6fd3a1817affe601868fa38c455968f45a75b2d969c9fb0ee531e679e1 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4661ae41394680afa7530155559ae2228866ce1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a14b9169e546748feff0a157a5688cf42bd94ca2df8bb7e045ec1165ecd5ca05 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..723f203a3647799e66964a8ab37617b080898509 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83da170071e62020cb8a0a2aa48891eebdbeaad33f8782744b302560c726813d +size 2637 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5cc90329cd4966396b9061dac8caa338f0328ae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b31b56ba4367ca753195d0a49b250b9579f778c7faa5d2dfa8b686bb373bbf7 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b7da038bf2f76115b65bfde8c55b5a5e4afa927 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7381782aa2b91b800d6a4cbc03e4f1d83785d5e1ac5079c10ea42586d8075604 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..95c3289e4f28d6429f48da4f09b9f77e8217e956 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b6de073d57a56c4431a07a156b271626194d69cd3265c5a7ccbba5ba58a2664 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..933823dbb4aff4fb75fb24aa1a89ac443ebb1167 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:931f738ca5b9434c23b3e89ad13eda14e2c5b392589a1e366858aebc14685b9b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1da00ff7a3ac250197b5a3fe634bb0dd49fd65d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0252346430221cc09290decbc0eeae7ecd784aef00d7414a1cf5c5ec388d4386 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8cccb53418e4aeea9d70b2d337fb02ef41f90ca3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad8f2021f643141ee4f38c060c73a5a1c08c36958c6a8eaaa81ca2496fa9fb3e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.15.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..98fd4d84a70fb1270640ad980b6386440ea2d437 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1506b72d8c44abbcbbdcadef43968718f63f9df105c4259997098c289d271b00 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.16.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a724a630fe707781a8bc53bf1cf9c4116b3c7821 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c97a42658f3a61e2d8e866c825ae1f82ada52a22d11d5a52baca053d9a73b1 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.16.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.16.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3acb74cdfed60245eb2f711df41c1a8c3f66bfa4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc01c70eab2aa82e74570b6653a0b5a212cca77b81ba9f7e606e1c071ba4f0e4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.16.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.16.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..215623f5f82c59f70a72000ec15221458513981b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:079a283afafee40f93a31522c197b6cbf2a4feae9cc026237ef560725d00947c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cc0c0c46ce10edfbe9d886df6a4315c696f3092 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e21287392ffd827847e4fd353148937259311ee208e3484408d55b2b58c6b29 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..64614d23b8b99002522f3994dc698e2a4fd8c749 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52557a0f8432ebc2b5fabc9466cdd23138edb1f1ff1deab46f9b6052a9e7ed9d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..584a088e7d00ef116a9409ac5902d8a409180d3c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5757da1159e57f610d5eecdf661dc6114dbabc0a2c673880610bad108500870f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..344c35ede34a6a0b52f6d000beacf46a9c621389 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b491f7d2194c9d6375be36e96cc6225b393baecb95f0e55bce417beb6d9b45 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..179bd1d9d3d287869f97c638018573e899fb3eac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:726c640b9970b073d5ef7565767edf3e47d5a7e7f3fa6e413be923245e06c0ed +size 8781 diff --git a/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e702aa15157964a5d4cf331d31c7f44dcfbb9ad4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987b519ce3017072179dc4b372d8e5266918c527044bb4749a9563ec6ffc86cc +size 1180 diff --git a/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1dd20d26ab3295d8003095883316aeda0c706247 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd84975dbab8f2e044ce3144fff0afee412d8bd18b5d0248168d21eb40c8e86 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4b0adaae65e79572ddab7794e2cec73d7030fa8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e79eced35d4453e44d20e9333ac56b7fcb186f27d651221b9713752ceb2ac6d0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ea8944878920bc8bdeb10b1a1b50c241fc5d84c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09243f75d35273e984d74999a8d94c4152db6af2f30de5a3e1af9cc880e96e44 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..da214c807f5d5501ed740c7dc0f71fb6f3b286c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e47cf265d77faa7a8c512fa2650cf12374325e73a53d5461168bfd3629f5c20 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..120b92f7fa2db5d97c51540fb2d50f802aa60874 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f2c552314b65e66f4be79f91011c1ce8e63a4431b492d6901fbb1f8be4d7042 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c2ab27e89a66ae8b95b3fa580d13e297fc54a8b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d02943c711c293e11245d0a265fd92ba83b3152ca207ee9271560b3fff200dfb +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..19510918e97a2f4febc68ba581fa360742babec2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7fb3fe6694bda4b6cddaf898c2b082572326627237206fe20f57429ad29149b +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fc527f8aab7606df9d2b3d61148eec56189da07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ed0d873c8d8fd8d7a28e1c68e6c890a4aee708a8a6dae2ee39061b5a384186d +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fcbdfcc13e7bf664e3b3203867f01588b3f9d0b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:161c71f6de314474de9354f2328d370c8db25e47a327c2f8511f6f1421917402 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5192b30925f3fc6514b0790b1b7d1b1de54439e3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e30630898b530847e5305198ed0f838a73562585d87ab308ae6648559c561470 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..74fa9d82a6260e743501ef9f6a07eacd5690c863 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb04c152dc29dd012073793be3770ed3e73e925cb310dd0994a0a506f4bd10f +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4db34ee9300880c2ae64f78186af5a60b13cc12 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da3e02a46ee3472ba910cef96531a4e56f92d9eef5cac4c2071b702652d92613 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..009911e4b2fb0c65f7f242461273aab2de7d646f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83211677150296e02ff73d4ac5de0e2caa3b56dee378d9236e563f577e4d4d66 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a219e0b43a8e3484a1dcc4cc6bc283c1e5b091cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b900838468a4dc0ec0b2aef5010e4bf50cd21eed3219ea9e58b6f14ba94fe3b +size 8781 diff --git a/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..05a45d0582fedff321dc16d64d7d249b925b8223 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad3626776348c8bc5ff3cb8083d94361f1d20313456da2bb7bc40fb9eeeb691 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0edd9b77a1f82601852327185152cdf518d187e0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b6c0ed13cc172814205bdc8db459fb6cc016df08f640c607cf7d53cfbd8126 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa910c15167c56ada1400937dec880b5ee0aa8cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d41927a8099de0e02a3e234ea0dac2c05aedb63a99a38a77eff47e09c9658fc +size 1165 diff --git a/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9737fef556b1defeaec444554c008adc7e44b077 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7af48ec2f47982566653a244df075f7f1f3a44205a2fc1d173a7d206946bdd1e +size 2716 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c34ba1cf6d1f6b4a9ade055353e9f5c5a21bd30 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42390bdb59ffa38f0d10bc7d4fb30e2a016178a508333891d5c0cf89d9fdcdd7 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..55d41564e6c8e1e1e6b342269970d11e345f8497 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c13672e84e70a25fa38c6920b6083c970106178d553fb59a2ba77788916a7048 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7cfba5d1b814d9158fd54d167f6441ba745e38c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee5d4a3f97177eeed4fe2d6d618f110be6489b36aeabee60aaea6e1c8c6254c +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d04d7a61640a00e39c4480bd5f602042d46165c9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b3ec3d9fbff4edc2b32a8526b29bf0edfbf1fa3e8a6892e9573432f202046f5 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..50013277db2006281da54416b6500cc7ae0d998c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf98b0919e7eb73c53aa7bba8bc791a9cbc306a778d1edbaa26ea9d7dae142d8 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1b845c11cf647554370bca0609ae5aa2516209d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ba895e91de0f245990af36ca2fa0b6c8f78d9e646207b754b9672274401f7ef +size 1180 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..36f091607ff64b2038449ca4eb5fcb3dc17b9884 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f38ef9cc4349458b1586a8c70bd663cbca824543a418622c572c11663ca9edee +size 1195 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f05cee7e1805ef1f6f483ca513affc49add873cc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05ab802e1ba41556028209b72cbbcee3e312f8a62fdbf59a7f3e55d7f6b556b0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6812e32acd198a47c82a989e6b2f9e040b473827 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28f33c1bfc9fd65513b00ccf932047ceab7ae5a8224ac501d3499da104c73e6c +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..10589d91211089655567f2830256c485eadd1dab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a0b8176d885f9be047f6b129133f919b4d1dcfb5e6c44570b8c6839f178dc9 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ce9217c81c9cc29721eafd3e83514126a220acc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab5adb0896f312b5219a8da48ede44feef6e624d688252faafb7d00f5cbf230e +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..218af7e0bb7961e0abff2a1cc2f0b4fd2ad4e235 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7c3313c0967e2cbe5618d8b88ca3d481fa492ac2bb1295fe1e4b3be1c846697 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a894f982fce2bb6389cc5edb7c609cb00ae81c0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:798d4a02f4a079cd5c3627593bba85bc85eaf20846710875fcc4271ed85b998e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae8e52d09d1a5f99a8dfe0b11f1f8b9733264950 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6293b3e5b02b8af0b5ad3894c846a06687fa4742142f6a0ab585f302bc09be67 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba3e1b2f1db5d5e8ca4efd60260ab88ceeb15c24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3435aae6482ee425791e544b858467debb239aeb692d89c79e1799e23a656932 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bc0615a3d166b3e1e1ba1296448256d7410cbb6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56fbbd269e77ae56203631d78243b554d3d521482b7d022e89bb4ce9c78cdd33 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b69c31b9f689b94455a33660cc2359a7aa522b40 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c086e93aefb2887b9a22c161c915edd58819f9d726699374c592c2851d34a22a +size 8781 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b84b773a2ba5df29a07e6b0c98cda8ed915f314b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76c84b9346f81ff604cd49951873de3b744c00ba4f4d571dd39d6fd75a19e17b +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad4e6c16a7842d92cc432b9bd0ab73b3d60bfb89 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8782a1447795a12371fddb64c9db180b9db5856922fd685a53a69f3ea674dd3a +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..378ea54af75e09fd64ff26a8507f7a8f2c169629 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39a673d26fd7b3c8f834626ed0f24cb0957a544c9a713a7fe1f28b0dc6082676 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..010d03939eb9d13b89f1ece5a3f9b2f43073595b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af68e60150fb49dd5c0b910346a7c4d4774acf3ca924f27e3fe43ead28715c4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa925d53f1e91e7e8c9c129477c96131ba3c8f3b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6875b8a2c7255005018ed0315d390734682fd9cb29757fc09ac99c59233077 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e8c7c68c297eba3dc0fbe9c7fe8113f9ce9a1f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20f84e6d1d7e51a8a1a4f3f75727b8accce8b9e1e66b5bfaada8d1aa71bed227 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..044f13cec90854b453afcba030bdc08bcb1a9f1b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4879c540305504b7dcf76823caff3fed6773c1afbd051d85ed19bd20b0721d00 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..146ad840de826580a227705fa7f9b47a0923eed9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97aca0907b1701281862306e5e24451c6074a2d9b0820bf5c3110f3420e4e593 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4a8184042b61d363bdd02e9167546e419f7b9ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b7592c7288e9ee723806e4a180e09e0b37bd532f6d54d5ebf431bce91883e4 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..090af19b2332158eb3b4ee453d453679e88bd351 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa9eeef6c8d9849d66e7a49a8e5ba2d01e770236bbf8813b9dbb9e64accab6bf +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d91784b538d10add38fba9e56b6c7e8a092c451b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5ec1dfbf2d7929326d5416a79262393330cd1e59f22186e38686de03178f556 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6df687634e38d05f1905364452ce02b89e2852f3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5955691cd3efd7f3389f0b54ffce6cac29ffd6bd8d5236344c621afba6dc6e5e +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c76c990990806d57e4da9e5b46337595e54b00c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b0cadca1c61fd153407fe1e9eea27ce7d947b4ef6f62472b1cfdff40040110 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbfe2dfa05bdcb2f8f49a46e141ef6596007aa2f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432303166fa61a48c69e279065cb0babc29d1e6908a576e55fbbae84c35dc23d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1614564b8049ba5c0bb7a2328750f22226572026 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5073daa4449a9c6de3463f199b7a2fa3713e400ace86dfb92a44d8c4b63202a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.16.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..706b1efd9ac80b00f6a7071d780ec8477a5ccdc6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b956ead58a1ffb9a399085c74cad934266a962425fd4e0f825a24cfd921576d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.17.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..482e02ce34ba2bc417a525cc07a12e35928631f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:724568a9c8c4d27dfee40250a8370d73cd75503b5ba937423f0a996e7f2e29af +size 1195 diff --git a/global_step243198_universal/zero/model.layers.17.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.17.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7d1752bf18bfe62ab12cbc00ed66f79cb865d41 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56d1ac91c991c3bab73c90c48374a7ec62d5327abfbe19998e1074bc15bdaa6 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.17.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.17.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6758f5df3e254a7de577e84d3f6fbd05401a26a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bd6acf5a66ba4bdc906c6373b5d68b254e48065baa0ec1deb39da30b9e5df8c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1d04812bcb1fb02a79ed86d41047ee92d4aa2bd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac3fa3f4e0d07f524b5faa5a53e24faf68a00c18f19709662205807d91ad1294 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b818bdce8e3e28517f5d71566aa6af3219c4855 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92fc026af32f27211d15f8088d6323e163dce6c83cb13916a19567807fdb3622 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b2879a96460036fdf810ed7a43315f79b5d83a1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e84b9b8e04589c26af5805166fd8e43099efd04398968a7364038be8b592a9d +size 8860 diff --git a/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a49710d27959020c9c21abe72323de4e2764561 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d0de16a045d68c6a2b4b18179db2742afd7f4a5731521a3ccc3dfe1d27181aa +size 8875 diff --git a/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3271cd6b13e66b52394dd4e09b21554ea6b96057 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e163432e36f10f47994f7c84dd1f69e91ba7de400daed56f73dd09dba0cc9b4 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..00b908644d7dd101ff7a596db5b6d3e7a9c78c4b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cdacf52a3bd232493cb05421e89882696ae29fe3fd2a83c75e0fe85072e370c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..28761e351340164e1c65c90999b5c6c2f0b843b5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d912464fd384baf3097850d559233c4f4fe3fd79f16ea233700789a6b6f5c12f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd3faa67aaf8198e32acc42f2bebc0511350c0f8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5badf439583b02d27c80f0c90c093e82542dfa8bf8a43f1850b807f02f16765f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4aa7e52079361f406d78c052c29867b0ce7f75ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94d6eefce104ccaebbc5146bcfd9faf518ab9d332ebeaa4ae88252d680308df7 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..24a20265abb4ccd301ce0a5e7d8ea50ff2fd8d47 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:828f297e732a7b19b609af00e4e136b27f17349ee0e74407afc5da7ac93ad37c +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..81d26cc9accab9dce694cea3466396e186f55f31 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16d63b6e3b7c587bd44b5aedb674b8b8f80db2d9421d010607cba64adc577402 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f96350c11a144f2b5aa299e48e7592d015e6ac0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:498804ca66c5670548d4040b801dbd07fa157e708b1fc7fb4579c82b6cafa810 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..49e927c981345563a9ca463d63ab2240fb9d74a9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41e719d97ff28bef7ffdd07bb491f513773fe2d84397bb4339d77918cbbbe6f6 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7442e4c1297c66465c9630a3917e87ff5da4f92 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fc30fadcef086e19b77a5b921ee5428cb3d836ec4c0b91888868aabd0ba4856 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0919374f2941e755860260185ff1a76428a6e25 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0125ea3ad826c7fb9cd6577e463e5af33de008b4e682470a00cc0f7fe8f33e4c +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1898407efef28869fd3e8a281531bb4bfd6aabf8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caabe9a4a30b8f2c88fb1c9e6c505590b801ec83dfa717f4d600a5035c898b0a +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f54c22ec1a3c71f0362bf147aa01b5fc647abf6b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfdbe21e56a8a2122d48c7498c09bf46165c8511ff50da81b1079a2b6b70c972 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f5bc00e6b7bf70db7398a3857dee6d5fd415a2e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff8ee8b8db4e7f61172148ff07ee7997df4e1426d10ff10d3441ac41d25bf65 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7675f382f4e378fbb12dccef7d03fcf08ff7c3b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04836a2d443ab5098a377fe1e2663461f04261f35190ec45f8653c9228f63702 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..de87dbed8692a686e8ebdb09d732a5aee710be00 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed8448c8913d1b414d64cd2d0d363e22d446d5b25035da6246df431be9d462c8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..aeeff5cae3b9e10e86906a8bca40248077ea9e3f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e5179af121db724701ba917eb859ba2c3d42ae8ffd3bd6853e5328c7cffcae1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8245bb63bd1797580f120bfdd4fe58e918bcf940 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6823469ce4fda569af9363f16842871bc12900d5e45db0383d06103179d66a93 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b637291a7862fd9e03e1bd15b1a518247947e30 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96d130a6e31607cbea09ecc05134cff2a6fc9c441133645637ca00191e17f6d8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a68476b559b37bbac3222d833ccc551d9c26b6a2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc0c1209437ee3203738f073e860986a5c2c470b1358af04c3d45832b0b32be +size 2716 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5389d9d66da765342d0ff470a96952c2a77813a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5a470ffd0444b9c63c9aaa7abcd8d1b778ce1f30da18be15ae78b1e78e1e1ff +size 2731 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7bc745dc31e9a91b5fed56c6e733a5333a3f6a2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a4e37faa0a461c58f65bdf397da10d966e0de5678bf2a08728d9c412bdafee8 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6746b8ea2f5495361f5a5af0de25a2a4c21671d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ada98900cfb5c32a6f0468b3f57d799ff00d3851a1cc308684b1e41b479f1b45 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7063b9ca6a96b05b68eaa49969ca46c433b886a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eef7b7901a065eb7199be464a49e39d0d336809ac326640466d5a4af731d22e +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c13a31af4007ecdb80af9eab7189da47239d85ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c18202e98b7bcfc946d5487744fd298af346a88f8f3768468a8f95fe5e569724 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..36db1ec876b7c6ceaa6047216c600a12b1949e0e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac366987455c08d94a57b80cecc36a1293b8258e720583b4021a3c1feed04219 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..300ac3fed69184ffa52cc8c359b9117532f102c2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:826414221304006b2ba1a19bdf1300c93146f6d182c5dc6982133bc97d124f8e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..32b15a6217730e33d12fd404a9819291b8c09bdf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6a5a81b8d02bd6e80456a6fe21b1a5b7cf44151487377ac02881430a51f85a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..86f55acd2efdfd7404c790989c78ea1a05432ee9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:162ffbe4a98759770d6cbe57a7e2b08dc2654571a642574b20f054c59192a059 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..444c31e8b73129c0d7ba98475f4d2091d5710d5c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c98dd67bd3408639f7c50e861fd2085b00d16a2a4208548710875d804d1f9d2 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c4b44271f84c3aff45376682f32e9c227a67b6a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c6891f2e3d1d2cb80b285733779e0104f851bf015edf1edcee82e18e5b6a79 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0902f4141775014b7f66febc15d5f741ed4e577e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37fdb2fdd34a5e276652b58bc6282fb0d3c9b9172c3aa36f6deb475792ece120 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..981373445750fab56d1d876ca54de03d84f550d9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2eea2c55e323ae8be77f18a741ee1d417ad6939cd0ac71b3447c21763ea88d3 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac97217d8336db9437a02a0fee845bbe1edf9df0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7be4ddf03a2087da64042719d735dbdf74a06643be17cdb5affe0461e23833d0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6dc28a531c98c4c2ae95873f1d13d212ba0f37d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2fa0d756f8abb9fd840d097ec69e0ba62b53f62caeaf4fe237614c186ce0e6f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a91bef4d9be1a9f657a10337ef07d15f9065817 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c880ab2872b30e0a5dd99612f65896416314d0a5eb37c24d0bac652c744a3ac9 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0441c61e9834e9c0b03904708d0fae06dc8b4ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca8d6451de2af1edb85d49955797accf13bb59f01714e374cc64fd9ff32681ed +size 8781 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb0a28ecdb27824929ef9aae669499873cba6f50 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95c275e89b32cf97ab473d83127f442365464b9db91cdec031a14ceffcb56d9b +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad4d6f9c5c90fc4451e0462aa997119d7b163bfd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07fd56085fccef2b495e953a87eb029fbdd2309cb6c4307cdde86650339c0336 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..41a1150247d17bec96dc15bcb089f1b95e6acde7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75cba81477dd73a600aff9525989cc3d19a4360dbcaf05eab2f87a2c7d114a3b +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d978537ab5b2135e385c476c786b1f3c0cf037b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd99d0388532262268685428bbc0774d46d930e37c377afdc8f74e348213892d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2cd9d23ebf140d89843b6710dfb07f56a178965 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d976fd3dd3a46f245d02275160082dc5c0b4f14f126d00aceb001599c1fc18f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f8238672ef8050d51e46b844a51be3bc309e3cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb5095d03173575a2d4ada3459a8b713f536ddd0a78229ee413e02b0787b8c8a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f86e8eb3a816a32a758d69d49427b1c9fc43f0e2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9c5f97943d11d76c643c9000e2db02c886fbd5df232e7da06947901d322c19 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..59c1c8bae9c4bc506fe5052dd0d92de68236dabf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efce5217a016aabe47d9a245fb7c7f3304cf16da4cff41f463d3d2cd5f439f89 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..72f9908ef2c4758e2b45cdcf627cef160e8c399c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e37095c57d1afb45fc37d908c74f886398d0a1c2aa52779533b3bcbbaa3f6706 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..062affe7531bb4744ae63a7f968e2cf50143f5d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:596ab20873016c049c86aa6f5f241a910a02dc04ee159bd080a7234f781af747 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7933b0e2a3b646d11bea778b56c1f36354019f3c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:585f4d7c3dabe09f94d6ed7c19e2ba44630cd2a7a11ddcace1dfbc12f90e90c2 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f72c96554077aa5b15024ec8971aee79b282b060 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdfef061966cba29f210604942342c4007efe45309dc2100b892e71e670fdeb5 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..33bfdcbd35cd52451ad61c949a2301a0bf106e7e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a48917dcd058d7cd421b784b6cad0a8ac9407c80df56adb138a7dcdda9f328a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..747cc7e4be97cdf8fc467fb09938b252a9fecbf7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a18a8ecc7d3f1acaf0f4b45d4eabe609891ffdf4ffa51a496ecca1698f219392 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1655d1da363c035a4f58bc6c159e136c256ec360 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9633894e8e8f0c797cd7d015a0ae88a91cdcef03eada15bf95ef1f0bc9df541 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.17.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..86157a26a4216479371b4f361f564ba1320a7f33 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:369956b2bdf8ff785d29d0ac698904032bfa3462f122a87f810b671dd5c3e4dd +size 1180 diff --git a/global_step243198_universal/zero/model.layers.18.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5f217583185626ac7d99fd851d5b7052039a50c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e295b092249b69ce97f3d98e04c116ef43c811ca894a1879495ca7ecb4341124 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.18.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.18.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c27b62f697cd97618e80dc8dc461e4de837c0445 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a79c08c7c6e5e4f790fb3471f04c99cb2b43dcfcfe46a3304858a7da79187b94 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.18.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.18.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3a08282eea65bb9898f86fa45b059586411e1e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e8ec5c21dc0fb07e95c2f898a38e0cec53127df4d0d98a53f94e8f75e5cc7f4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd8439379302015672de08b0ec455251a589e534 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a4e8daf3cadd0a2a91cfb985c5d038505b5919b7daea5724387fe393949f6ab +size 1195 diff --git a/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..542a8807ee7aa681e685b89fdb8160cdc28c983b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ffa945dbdad0d939ee523bd46dc10e6b3c05dd4c5b0d5b71931efa35166236e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1fdf9cc3311da92fd424d663b1d0481838cfd4a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4748883683ae70d7e22681b580c28c77b881d5f431abf0c3fc1811aa793549e0 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa1a10d7aa5a21431bcf1f654f193d104e82e6e2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0de365a91d1af0c334b287b3b426a1338d74324401775075348f93d490c48459 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..964cc8498723662f00c37e7fcdaf3f7b8a003282 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03267ee4aebc5aa047aa1754383f6f69d0dc8a0b43972ff66f669516f3422be +size 8781 diff --git a/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..81a690c74494d8a8c4b2ca57eb05215d55b0907a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87cef7a432386b7896165d8331d2d35914721e11a66fb4dace7c950003638ffe +size 1180 diff --git a/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7922f813b60f16fffe6f0bd2d3d5fab6300c57f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dade13818ffef3353b650abb5b5ae149bcd5f6c58429b96dc7656b484bbced9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4ed88d1c516abc72ed5a157a27d3ffb80db8ca1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:265d35b51310fb394a70ac10bdfbcf5fc0992f99aaae41f0aa835f11e47a9dae +size 1165 diff --git a/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fd04f807b90da1c5bee7c1926045c53067289d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa07fa247cf81d224475cb99ba8f54f8c19e0690c0c2ed6237ec88da2672d625 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d949b617060381b0fd971668e15898d645335cc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04aaaa60cda90b6e109e10b417e2c5b09c6a2b9e8d2181b9530a10c89da058de +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2644ce49a7e5f022ef800fd1149bae42db08ffd0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9737a1f15b5a7505d3ca0172919e0cc5876f364360ac1d865a8de2f654fbf5c3 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cdf2c5b748ef2006617987e60c707d95d47631d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0ad5c60ceebcc785b4aba179966907e12def8a4ef1481385f2c2c023d5e2967 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d0385a460e19752ff2051ae67f481a9fe5ec2d1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2017ea22b81ba2c21d9a68cdbf696529e10eeeb71bff7e1caa0e29a1021543fd +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f00ba520b389e804ce2932fb329508a1a9daf9f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457632c8b3201d8e72ee07dc61d5f5459dda0e7162a9e3debe553396e8f1b065 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..afbf3ded502a55cb5354fd52de138090ac2b111d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c203bce0906d4d761549171f0e60d5f10d6001300cdf13361ea108340548fd7 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d209fc32b9650870fdea09fd3be073323e687eef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38495e2c92297bbb935cc5250e48efd262f6b5bd9ab41c977d633ec6f4edc2de +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e97b89d995bcee888b5d1d548bb7bee171cf5823 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9784540964f5cb3eaeb7819119949f2f4d46ed8e25aedbd25c56795d116e4bd5 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d55ca098bf08a15fd754e87a246e9054d8290a66 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4f9f1328c168f74c3362362f27e5944685a073349683f1c5f3b52827158090b +size 8860 diff --git a/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0127c48679ec5c106abcd9252fea018aa2af0b76 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2abe1719b698f76e46342c754dcf0fd49a97335308c65e513a1297cf0dc3eb6a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..92667016c4e6944f9f4dc9cbfc3973bbb704e798 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae7e1dd3a4a6cf426607e4327e09f7bf1a9f8f5a310067df4340df7e020d4a0d +size 8781 diff --git a/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0e2fdb3ff761d4a1490fdd65568ac8dceac053d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed042162d0248bb6cd3da19d3f65b9f856a9e90f45dc4ab42c2278d0df0f79a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3a822cc54a9be4f94581bc2a9f2877af2091a52 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bf79b13da9c609c4c8ad8f0aa88f00cee08f9f98d86212b36d4c2f4da60f5fc +size 1195 diff --git a/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3c575870848516c006e577fd7b98918408b30b0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e45cf232b4c0f2da56398cbdbd7d3e19184e2ecf4d3832ea852cad2c1a4c6a70 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..335cfcbc5500d64ad2d81461949c9557a6d19bce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:165d0a8627bb801629529816ff05e14bdb31c55eb80fbe69c07d46f26e618c01 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3611fc200471dbfd3b6ff06dc1e1813beb0a13d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73402fef8a28e77bf05e70e026a4a37a14f893b8c0d93d62860852966a66d781 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..02ed734b2edcbd4a9a329843724e47817e9b9ee7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f06f0f4a2c9c86dd69af517828a4535d7d1d3a43597624f6ec62c0d3a4b7275 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a36120c1ccdcc7692f99a9c26df417d20230abca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a91a7aff680cb296922bf488a66881d5439a4add4212038b8c11971b8035a173 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7377471b4f440196aa3548e43044b83ea1a6a036 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc5610a02949bf510020803601d934b5e4dcb44434d1f2e1a25d603868e2ac4 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2f68b00112cef106dd3f71606c9dbd18d1de981 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a431f604873b3b60a9fb4cc77283abd884582556cda93e2d831f290c79d05989 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8516db893b391b1e3157d2f3a5811f9d22a236cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb59f69d8c024d10e08d012bb94044c1236e76dcc99b09dcfa02e84a0fd4947 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c1ced329507ca6b7deaf14f42dc996dad0a1577 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88adb6f7beef4e4a642d2f6ebd4aacc125330bfb52eca5b9693007906d106014 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..90d42dae4d576074728ddce40795b1a83f7fd46d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6c28f3ab55c972737bec42756b9b94b4143e8bb8572c94ad1c43cc52eef5fad +size 1165 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..09db91dc4aa6ac0822dfc93e05d2ab877601e99e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ae69bc61a3eeec61b980963036ea008d98954285982d6e789b8ebd6cc99dad0 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0db1d2582a82ce90906a51734b95f68b8df5b68 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3886c3554d9cb410d0c458ef6b687a9c91eb660016b781ef8190a794bb30fc8f +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c29efbacd1fe43f58ec7f502ebf6b1b2a6ebedf5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6657814bb841a4b1c2098666cf74664d9e3b3a50c44d1b1a53d7e11a20b5ffa0 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..380fa9aac8bae79a96e8dabb60a8c01702bc65ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f6eb54241e471e7a07fe008c9089dbd0774cf22098bb58a393195a0ecc1620 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..03d1188ec47b4e81d61fe581ae9b952e41e8ae12 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e23d57ae5a787063810c0ddcae4181c4991ed520db4beee39c36c9576378550 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d0460e298464d81bb0ca21dbc8e2304e35f8a35 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a66fb1eeed7a54ae897e6debe23ca8de8f3b9276d8ad8151a1e837178022f094 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fe0d42ce0f70e68d34baae2a799b3f3656012e2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d532fec8434e657a10efeddff06b1820da4835930adb3ab61393ca5a49ba1f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e821a021ea10461ed86720fd8b9950394ab7813 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a10c66eeb76330713faef486b167a9205ad1c38f6f92414ac6b731913907c726 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4b44d05c8b1b49aa00fe56e639be5c434b8d944 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc45aa2ba2f81192a767b092fed815ed696652524846566f4a04d75099c85ff5 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9333b1178083854eb8ccd0e96b9067c81bedbbd9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a69093282ebf7647adde261fd691e6df21f4a7dfbe32720051fa4f72e1fd068 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..30ee6ed2647813d2401a7bb6868b84f24873e882 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e303de463b861cf882dfdb5c35a4144c938b84e6f8cba27162d7b9de16a47b +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d523e27a0c0a9778066ea400ca79710898700640 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fa7999aef4e3e9a24e9c3b3f76f20d94ad97a4aa27988a3edb24939be1433ad +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..341bcb65b08c9d67f7f0185f22ab9cdba3281c76 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb6efcdd3ba803ee17d27bc30abc07e2378db6e67c0cd13981cc4780e938545b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..66db986553b04ffefb615de4765a6a9072a089d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe9c26e77d71f1d8956d8c7d5317d86127007295885bc71b80dc4d19d41aba8 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b54aa0db666dbf9a906e05760664c85fdb79efa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:863b8d66d2ab1b0bc182d3a3e5b4a500948d8f20ec536ccc746612cd92bcba23 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a45adc86173fe414994af936d9007438281fc2a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1395ceee5d67d7f5a9e9d957c0458ed158d8bba0ee10b9ce4de1f84f2fb68d74 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6879a4be4a6d235158cae578d2c00a3643126414 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a9a3781c960983998d19f4f6f681d9ec6dd6e9f9c685105848da63305cda48 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..15b7a44108e30d70e4fae7874104cc8840d8b06e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6103ef06772302b9a80de295099cb02a6e542839d4d4d7b1eb7697be8b5a7c09 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e282d691f0e13886a71507e9113f68559278f3dc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033f12fcff47441c4e3044040bf06553f6e18994a69a01ead6f6ce8d292850f0 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ec6f0b2587eeff1aaaaf0ad72d3f18eb0ba9c24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a039253d66f31f480e640ca024f9508937751fcacd8aea00cbbf47b5151389cc +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b8274b59077a1f4e2a5d42d026b2d1847d4fcdf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b16aad1f221445d3e3cf9df76879d791a7916fd4ca032cd6a569bf4d0130407c +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5330faa18e8a853be113c4c05e559e0126e18ce5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79815e81a91ebee8e69ab1c2a959095cdc4b2ad1ddbe83329671ffb9aa047a62 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6910b98745e37dcfdef8eaa486e211d09d85df8f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e64c91ab1ffe550c3f2acac121a4d90f3a4fa3318d1e320fb61619fbd3fadb +size 1195 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd2a5d3a43682f236999528725e4613a1be01406 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9bb853b0528e5eaeaa5dfef36894bc493f0a65d0c53cb10cb8f05a5af97df4c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.18.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b648f472e302476d56abb0e32b48dd2e8da4340 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bb6618e292791d839c2ba68e13542bc8966de2ab9e7b9a5a36aa5de2fb3689c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.19.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6797f79cc4796757f0a5ee28eb081c8492f3621d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d82a9c9723e59d150b16b072eaa013ba1497a857e0806c75fe2daa11c608a5fc +size 1195 diff --git a/global_step243198_universal/zero/model.layers.19.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.19.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4dee611f277898ba17bbafb0c95f83874b5f027b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2872cd045e224759a325d676a7408e9d5467229efd34b064a4f87de29967d3c0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.19.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.19.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d1010e82dc50ff89d93e612c5b6b700b3e9b7d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbd5b6ff090efc81454a0026cb778f1adb0d3c4f4f60ec0ed636baa4ce5b99e5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0249212e20cde2722b95131e8c83cf22047cf40c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40905727f3675424ee2ad9a5b461ef6af3be51d961429a40a1dc1ec23b762131 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8f72cfe267f14844a000179e69f97496598e6ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0b57c1cfde2da5eea92885dfb07e22e0c388beb70f468e0d3a09d6ce5abf510 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..433385def476219e85f7e4f593377e5a80da517a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a0d2a8bb1d579b83f6666204f3970d21db569eccbd8a148a2b7a05523694f7 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd36b0ad7290d081be88e0d5873320b6dc682ad1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:595ce5e57e88c11df8470458b26f8a7e4ecf66e57eea14ed4ca8229ce8013664 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c87b8032689663cd6a6cfccc880ad8ead589450d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:680f609f6ada0bbe6341f88c061d0cb00fb5d594f355e7d8c973d96cad67e895 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..139e8993d9bb819579d52c977bc1fed2651d3cc4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66cc798dc633031616fb9b04b30d3280edfa27331b194d59846d054d59d25cef +size 1180 diff --git a/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ed9302c42bd3f9a855e3b9a7a7c829d448dd34f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b0a7cb03b2874c8308f3cf8f77b725dbc2ee42831764686b8260250ed95de75 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f417f2b30fb830d48e45651aae0d459bcabeb7f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9c28cf11c23005e5068e49d67af401b9a7cb4ae655fcf44544d13c2704bbdd0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..33ec1f938a4ff2043f8f70ef2d11bd3c90aa04e5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4e07dd0e2a357f65ca6a0360de3aaf69625b8701026ed8cce9086a9cc75b0a +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a1c07b9a86a7d2ae1a0155f8aedc90456e9fc28 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9989f562b7cd6dff903a1376be613c6b08746e8d3b4e0f90f82e69a6273291 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f848d2759972561ee67ee4eb747d4d631255441 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb61cf966ef984a40b557ccc91e8b589cccb99967ea9dfc2e98dfe02f55c347b +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a4d8d45454253922b8585fac3fcebc62710fabe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f946dcaeec75583b5ce78c875eeee9a7bdd163df335d2e473332b52533289c81 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..791f82114d62e903d540e2bbfbdf3fcf70a463b2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80156ad2c9983e5461a177f6d65559e77d98e410465ee9862b0816cd641cacb +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a554d1b3573fce29da038e1c9b60e1ed3394d7f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2bb2d4bd31449346016e1d226149e7a18e7de5ea19c28b94086afcb18dc027b +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9124f8375f8d5d8465bf0949d3dc44556bac63f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e16077f526d14546e73c6a2a2952d7ca0bbb93b28376873510402b5635cee37 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b0cc635550b5833ac215c0186a4f6df04571d1b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9165ad60ddbb68d438d93d49bc1626ad138caad808dd16614cc7916cb3c0807e +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5678558582ee49e516f7402852a4cdf43d7daa9d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8462393c164b23f9a777de1373ed1522732edac6090a961be2aa11955a692a5 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..88738915713399eab259c5ea642252b4c97c77e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd9199d1ec3bfd29b71810b77d5318697f8265723515261879caed2010183206 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bac22d4ece9d984a068bb57e6bd42ddceb524cce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2004ae985458dc28f73b148a0cc908d7b8086a1af7ff1b866aee9bcf7476baf +size 8875 diff --git a/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1bc247e6aad93d2681107999a109aacd4548f3f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:173027c6817ad8506d17ad64df03bfc9f3a96af57acc242f1994741b9ac300c2 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..43d172ffa78cb360a3e69e6958c7d30c4200d89d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0297bdb2e7cd161bf3d0c3345e91410bfbafb52eea54900aa03e6f04223df5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d29ae9276dfebe1f9c3c319b857e8ff36b50116b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9589aaa8d8cc0baef7eff7a89b230e5f08a4de4fb64c929f708a026487eacda +size 1195 diff --git a/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..615e3a70cbb6986680690fde5a0a01987745865d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90c8f49996960d7ad4a06ad9dbae3383d242b065e1653ac8d47f2a9674d10e34 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c94a96266b6d4cea60c92d22eed8407cc12cbf4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6154e2af0205b61c146016dccd00f3f3c8e682b8dbc022034cbd4d98866ef51e +size 2716 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..937fab4de898e4e7e292569fc9ddb7d2509e1e7c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f47d662f9da09042146af1d48c1a4a60d290ccdc72d6efba472b6bda5ed4d0e1 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..db0849ed7588f1f423b0ab01ac40790352854c10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96a40c5ca201f26984a601144f9c37d28f281c259e10e7c736b5de5309e4c22b +size 2637 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dc99ae321d7fc780b0b5d884c968d88bdd406fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd7a9a5eeb1b81d4b967eecb576e8cd676d25d91f123b6ba10cfbecb0ec2ed8 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..41d1ac251de2b76f69ad9e1eb6bbb631b9566609 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb7e6cc38e4a036a8c364e48945bf817fc5e45aa15678bfa3966baf74aa91235 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3110630c5e617533f8c522003a13862a58d09ba5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a03ff9de6156767e7fc6b0d695ab91c5ddef3078d720eecc2d0a89f27383855 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e1044fdbd5d19f9a6c56d19a1138b5eab404ac2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1fcab03f97db9511c15a5b0ae666f844e6f0d715c18803103b2871d525f568b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ad01154e07b3407c19af9f57126def0cd23f310 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1db0f7d463b91b8f7aba2d09f2c2763eee08de561729e050015c3512ff72159a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa5f0ab3627f894e3afb7e98970d34f22cd84a46 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab5a1c8dfbbfe8a67f8e5126d7015994dcd353629f11e69a476de15d45fe449 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bc1f5916e6bd0eb175e3ebcbe5fa6d0adb5e87a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8f3bbfee3a236d18c39b3a70efb2e6d8a041c4fd66e39df2e72e1aba7e6b14 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b937a9839998aa977ea7e0fb4abd253d95e3492 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a4f5e8389ee0b1d87553d4ca4b252bc38828a6bf1cd5bf5130e440e6a9daa11 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f19e6f7b30384e612663e7b65676b0b8edd8de8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83f5ae316779f8eb24851e5604e272a5220cc3cdc4db4c1ef11ae29e7ab7ae1f +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5abd1e361f096631c74adc1abd43b4528962514c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65628fe6297efe038932bece97760d8161972e8f7991b65c5c41ed301b3f2a80 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e52e64ad2acc97124b175885bfca88b4584260b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78f91999c2a7522ac99cb38127a0d429d89ecf5eab215dc06e556760a56af04d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e304f9972fdebc8680d89495ff34f3f7ebacbb4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ece79afe914e65830af94c463bccf63c57f7e2643fe6a6713385ca0b7ab32be7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..45385c0b1fcb4db052c2b33ec4b70510d31bbb99 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef0d928aed4e5888703bc8f93adfc7f5c677a07fca1fe0ea57af164ff647b4f8 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d8c1ae068e3326abd667c58e4d9cc570ba8a456 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6caea5ba938323a0038ed029f2b05fea1b38688a3ecbab2cdb4167f05ea3b08e +size 8875 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8004e2dfcfc770a78b4ae94da96a6170bc066382 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9ca0c4e808c7236a4b28bea8aa4c5f66a91296977a0ea54d61ccabc554bdde8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d8dae9244aa654978d1d6321ed76727b67cb2a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:337ce22fffb8cf0c82fdd9a61838c2162267d3c3bc95f87e38aac42c15f79716 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a676aac058c9503757c24866bc53e0ea946dfe42 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0605f44800741df82378e5b6bdd18f66801cbe38001a48ff5dc13d3dfe004e9f +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3737a8664cc2f8e0349abf676e23dc722c01aef6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a1112808ac1e587a304600ee9ad160f8e20df151cb95d2f15126f913b08c9c +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..031e1534820feab7868d66dc40f79cd9a117939f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:467c6883c1b3be49a87c0ffa4cbeae8f9d50f58947c6356fac55c8993eea2d0f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa7c909acb2e1be58478a96e9c3ee2c842b233fa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06836a3c7108af16cfe40e44529b075165c82afe7cc850dc234544dfea9727f0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..77d797b56f1ace5fcf4a46e654777dbb8eb81cd6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcc5f26d4a9d4c98efc37579671b142939bb0007d0b858c6de3ba8433e377815 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb35a1c897e17493bccb265244197abde217ecdc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e384c2d022c78a34ef725e0a846c8828ef3bcecfe38981ed892bed5c717cdf67 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8a5282493ee5f3874c0227e35cf2c6bbd5883dc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a3f6f6ecb5aad394940d18102729c6002dd20f2f11703316e1145641728e75 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fe117f835e300b5cb4e45bc72082e3d2b0d5e86 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b4d3362ca75d39e59f6df783c5d004f78360a85b1d70e34d185921e91711069 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ca2a7760347d5ca8ab13e53e29bc43d7e1196d9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed0bef8cc96bb0fb812921b1fecd4c5efbd4e34765549ad6cb89e0a83996788 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9dc865df3413cbec1c5f9102f4b81f3c34f4d9cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d907f5e2aff5b76ec80715f8f192917559866fdadde6027079fd4f55aabccb50 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8012f6c1e956e5437731d032b74df7f52f5be938 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:615abef2d79e3e51e181059cf6ccaa4a205b743a1cd28e58b6d21e0886fc529c +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0418dec6d1248c3379e74b1a6a3a478f9ee6910 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d8a322428b58579c62bdffa15afe072c835a93c87a6ba888c7b5fc75ce1d13 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8683c78154c1e5a0a7d7c49bc0a2e406f13a164b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a183fa0aac5f04e881e529db22937f2a30e03753ba5ed0c6a20e8edd1ed7859 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dfadccd1fab35038b11fafc710402d3f2f0e0dbc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b291182a73215fb24a862ba6c858ce57a621150d05c075658919e7e2362b020 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.19.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..db8832c07fa8370cf39ebebd419d86bd63e1194a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe15761b61a93c377f77117606a1682a9557e7302ebdfba5e03a3733c26ee171 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.2.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..81a3967351c7dcd18c6bdeb9749b5062a8d8d685 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78c3f61e88e6de48e4385b31b8ac29c197e1aac221871d0a6b4b3436cddbd1b9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.2.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.2.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4519ec8f5cb78c3b1a43f716fd2fd9e87538c4d3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca9b758706cb5a4853b387e16d395acc3edb986c81d50870333457ac580d3f4d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.2.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.2.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ec80296652a6a075fd67f0dab9dcc2d2ffb7ec8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc0cdfe3b210c849c259cc7e3c5cb18c2ad1df8faf512fed73cb253207afd67 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..37904506c1129aedacb2cfc902fdd60dfca8224b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61cf63cfc95e678da6d47deb8a440efe78a818f448c4a824ba9681f8537f7e82 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..05d7663d8308be21a2dc9981dc529f64fe62e167 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b231dabb875487a9b5e599b0e69cf042d8f00b86e4ec586a353c81d4c60a3e90 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8137e437e4d38d3661eaf73114f504052f031baa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb73b42e9b06b0da7117730bb6f90671aa3f556765b4fa2f007753917d3e5af4 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7d6668de9dd20469fd638124b8c0131f1a125d7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8faa51312ae17d62319391c37c7707236c400a92d90a6260832d368c25841ab7 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0abd9f584d9ddd62023de78d233ee61dda599cc4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9afb0ef402ca6c4e7e7383658baf16ffb8d63d8b11b6ce8f0d759177bde6bac +size 8781 diff --git a/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a807d1864c11bac4efaf857d868ab084b849c21f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9b1ce9cfe73c0ae76c18777ca39bdb52f386dda98e26b94b5f980962d5dfad6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fea73ba3a3b87ba34f1df3a65de811b0ae0840b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cbe3667f936a92aaa9599c43b81662309512f099e220928b65f264add24878a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..99bb57983d079e02a46a2d9b2159d7400ab8a33d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:205dc9662d480d0e24db1a12ec7dca58adf284983cc523504c1b42a8e431d2c2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..58c5c2dbe358e76fafdb7586a76d910cc6080074 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fba0d2b17e6458bffc81fb995e4aa3895d2a5c4f1d7f63f043db6335f1b475d +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b2efdc351e6d3860b43174102918e11ee73ad60 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b0a9edcabab3c5bdeaf4e2fe6c2a88bc775357561ef6433e685669e7ea50575 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf8344e7a75ff19e7cdd95c2ce888a3f5f558031 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51b531ce7f25dbf6260ae384504100fb99d44ce73b42877f25607089166c0f52 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2986e38b56d07ec04676c9d7c0ad61a7856dc744 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6085d65648620d63ce4f7fab8a29233612f2970961259de21073ff957d8e6079 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..16aafa3f4135c847cce2743f24b7b9b34d3ee328 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7204f870f4e685c03aa228160965960f399714b00bcaee9bfd3ce20ff0340acb +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cb0c8b9878e44ec35294b4dcd6718cfb9d50199 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e587773dc0d8c0231e20fa77db62755b1c5d690aa4eaf97b462d26a19c6c486f +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..76e044bf26e6fdb6ac8c88f02c49388fcd5082ee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:208dfbafe5c31e8ff175fb47a532825dfe7b7293d0fae2d8f2765372b917d188 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0c26d6ca3d3f4f8d76ac8cd98de3097a75d01c7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23fc03e4f85f25890634e52dd5a8a2d6f9db6edd999b84b6665865b3654f1fb2 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1350fb837858e6dbbce8fca69e81a44f0918dfd0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ff5b10b941965aaf3003073460126eb23b9b3c94057af87d84637d6fed91c6c +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7af86ac0b9b147559b469e39c1bb39b3bf819775 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b8d8eb6379d8daefa43d17acc34441bcbf9579d0c106830d1c56536e9b6e07e +size 8860 diff --git a/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5d38ddb328b4be497fa347ed85671ccc45d0447 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15db43fba8837a4b88e795c72204341b8a027949b0c7bedc639cb3004a93acf5 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fc953364fdb78035f2879e26a1768f2d9c8a539 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f106f9868ab2038e07ee23473e7c97140943bd587d34af738f16091ac8b6e9 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b1da4d2da2f0add63a733b6ef4802689c4b4330 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96678e300c9bce42b3d72d096b4e7ed81ddeeba978a621b040a4c90a0fbc5c27 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4877e3ba741d51baeb00c2ae27f87a360edff98 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe3c349f7009c696d12eb5891ee35b72d06bf3ef45d56357a3e312bdcd1ace4a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad10a98b98efa835a43b140e084372c9cdb3d39f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1f7c9e0f9121f1d04bfdfc21702072059798312d6198d74b3dca5934967b948 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6da8ccf6d2b22ca096c24e33b5f58a34e8ded244 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6edf42283290c27de3ce3a10ea93ae7cb47b652a7c6c927d100ed6c80bf136b +size 2716 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a62f6ef26cfe9b1f57cff3b81ad09f957c009e43 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d41a79c601009ad2d8a2ec72f5feb76319f3679ead01b8aa01a8ef73f9a69c16 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f656790226669532879aec026c3567961681a8e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb247745e5ed71439b444ef0f53cafbce26614f9682f615850c798479ce359e7 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a5b50438da31c85d4ad01a62562df36fc2dbaa8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:544c749dc892d2f6c272c4000d3d54ce645cf425d00d90d2becddfc351199d9f +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..842c4d5c4524603103b3d7036ef20f1f08214e6c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c54da41336432ab4a5b325e1e5a4d84bd895359ce99fccf3919da1a1847ca3aa +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..31fa28e9b04204829a9ef17a4eb45a769077e615 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df141b9da803bc0b98c452cb2081cfbd941f4a5a0ea5d1b3a909808e2ed9bab9 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a2c04b80382aa6686ea94d6b99cca8a3ede5822 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:313b05fca4be85a9831b74c2f3ad3aca91e7375134b28c2b2b5f45bb30c9d39e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6899bccc1628151535f3a093ed859ff22483c38b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7fa98ec5cadcaa28d8e15626483fe025e0e1d6ed1e5ee52c48a51c88c2c602 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b35703bea526de9b80728e46dbd4bcc31fdc6b88 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7075985c8cda4eef5d2f74953ba0fe370ad466f1e7e977aba80dc5424ffe2ca +size 1165 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2183b277b374526e766e013f3095c77d0389c71 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8014dcf05d33ab9179b276d211af5f5cdf7e143b1ebef87c307cce23330af2ed +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..59a641a96d8b397da26d4f9defa184c039757961 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:637e1d3daebc1be49a35175c3e258bdc2fb673bb954428593a7a6f3f420dd734 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1033eb81c8363e4ad8c1e03a52f6b6f4627169f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6241afe0e61d9d1c090ba7534dd6ee57bf2a70f2106c8de26201c5bf63bb685 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab99bb899ed53aeaa8e54405fb8b6755b76ff0d3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75677ff9b077f388bf3c325aeb0f8bda6ef448b36b015fc4a35eb8eee12cb8b6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d5fa2ea69faa7d9407514d672cdeaf9c91277dc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e768e4cd100a2f8124195966329cc40c345919ae6f939ddef13ae9ea8e24d9b0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0093e544383116407d58f7831a7d4c55c369c72 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d5b970c8f1a8ce3b89054cae37a10eddb581ea4d40e21c760e826985c8713e2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a4eaae4cec292ffc4557c4ca40671e74a5e479a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e863da632ee64e3490537f64d04d647d64ffa360c70d989f9583b8bb91858bb +size 8860 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f737bfd90b57ea62181c68d20bb4ad7649fee15 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb5c73d0c6048fc0d19d0d1ca28769375204e692c1ec4773348b2322ce9a5bce +size 8875 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc8349df16c8945e17879754ed61b51779c84e04 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cb34675112ba8cbe47f66118aee95aca19b12db73ad864a3491a47a759935eb +size 8781 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e660ffb904f312044b9da25e863f76a662ce7309 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1842aeef63c4b95868e4d2d3a24111bf594aab2eaeed01e83192daacacb01b +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c02573494bafa31e7ddf03ecba082dd97445e30 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d22599fb1ff642cb78017897bc149aee7fc2a4976ddecbd3983c48dd28c5754d +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc513141314232a6be57bf4db32c965c652fc190 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74142649461354dbb1f2aaa3fbd578a26aa6fec73280dd59cda9e72d3848b0e0 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..914817a17ae210b2d66c0feac3c142c7f25820d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d6fc9974d74bb8a97406e2410505762678fee53bd3575e37d31d68f8c4f30d8 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f868ea4b4a62573c2bfa3a3caea3917fec11ada --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e11f9b3b8da07da7cf77f217e8420da32b5dc789955f2e97ede0ff76a150750 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7f1abf89cf18053a20faa90fc75db82bcbd7272 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ec74d7f66b2b9dbf0e0dc3ef89ed27b07acb41f6767f5284f1d064b555a7ded +size 1165 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ec5e2375a15ddf571bd896993ff8347aea0d4fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:062939ce6c59db369cc3d5426357c44c5d119f076422e6fafeab87b79ee95662 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..754a4ac35b5998dc7a1df238ed37debe409f3920 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1c8ba9cfa6120b51a1e418ede392291dc3e42d0a921cf124a667ddc49629042 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..43123cfaa34e60c002fbf60af19f8cc063c0ef77 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ff4e439009e601770fdcc3a180333733ffdd5ba16174eb74fe3bcef60e8533d +size 2637 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..efe7332a4701fd1f9b309cf60e6e419f83b4eff2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94817573be6c0f9b8293882d94c1ab4db34d098683bb06f52e1861ad3d1e2fd6 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdba58a5db6c99aaf518bfe8ed0bfa1327dded57 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eb13b6a5b71853ae13a3ae1a0e1a43b750c7bc1c5940236ed64877a8e1ff4d3 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9cbe1ae38b6da5d7ccc5b2d46d544483c2f9b52 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148bbdff409acedca9caf4470bf0f589cbfe4b3c0f5015c528e47594b143fe7d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c5651b22777b8b470a6954fc0af0bd42ba0332f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d04e7ce8ec80acca71d34e38464b306d8e1a4549913f1b98d7541bebf6f22e20 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e058b4a9f841d223b86dae90ee97b7d07da127d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a426bf7e1ab4607268d3922bc8fdd5a3567d89690bc97f3443a24d78653552d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b16e8a5271a379b72b6ed4886fbb601026cf8d90 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30fa40b3f4d439fb203b4e472b369167f2f2f9326be4f68189fa7091d1a5d1ee +size 1165 diff --git a/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.2.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d7a465286ac6cf55c7f5c2f76f5d7a31b1ef66d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f52564c294c208c8f9df589fe9a632a73510a77e917c08eea2512372a9b13ee +size 1180 diff --git a/global_step243198_universal/zero/model.layers.20.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..476334f08fd5e23942dcd39207a733a87031ff18 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d596d049c87d50aec54b02c6cd9f2b49c4a9df48ef662161101a612ef17bc0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.20.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.20.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b33c58a2524b71729cac68a8910bc7a99dc8996 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca2fba9eecf79dfa80742d26f94e74ad9e9115f0e60b347e3e3b08680752a54 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.20.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.20.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fea6c8cd8a322053f1cd7135065248bf87c80f56 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5b36b32e8699e43e0a218269b636b33dc960c2b05e52b84cdbbd0bba3a4c217 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..158b3b49779e863f6eb5dec8e8940160f257500e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:631f54b93a61593183d54bdc4a4f25afc481db4333d2b7421cf7095ddf4da8d7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b6323740a2750311fc89917c7106f10a1a100bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbbb56967a31d169645136278dbb5d2b52c5d636d47c9703eb558cd53f3d62ab +size 1165 diff --git a/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb7383d498a0a87b6a3eafacfca17c3390e2ce8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352c9dc02cce0e824b4e899de0c4684e09fa6a75e442f3276e330d763b7bdf53 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f26bb6be035517cf2635956ba964cfb7ff01d7d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca7b76320dc3cacd3c47081cdc0e62ec029b6f16060ca77bff3fa3d914d73711 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..da1a8b93865befe7452ae939d8ff0a21a782ea6c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1826aca33b25cf66782fb584b7e2d3430a90dc0a8e96ffffd342aa55d387184 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..57d7a147a078f0672056169d5a942852718597ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d1573f05363a4567cf7591c9f5eedc8c088fb1abd8fddc3c5b6cab99569cb56 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8aa861ef391637658add0139cfe4e84a8159eaf5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:745991a25693dbfac88600b84e4a84c10ac5b2331965d34bc334af1506545e26 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1063cf0f7aa48e920c04216460a425892ac72798 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1419a492c331c8887c5233f93e6e351785cbee9a9cbda79f4ac7b50c651c920a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..baf72e34e49f82900c9e8d8f964acd66108d86f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eb5f88c7612a673f13d570177597f5d8d23119f30275f4b9f84a0845a093ebe +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d0cd32f5bac44de22a776ca2ea8b5d963ad45f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44f98bc393249eaa9fb96c5bfd8731f318484731faef723116cd0a7c6660dfaf +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..36667199c31ccbbb78dcff8b9867e857e2d7475a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f6cca2fc0ce07c7098ae31cb9eac870a872f743b5df2fde172923b94e97f2f7 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..82029b7e8cc7f62188a9dcd8c83ada63dd2b650a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d989ba90e534bc86f0a2a63c03489131c75aa0d72a7686d7b826e7f8b559f9a3 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..601366a6a9024c50e0da6a6ca29bd84f62bac4cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4ac0433877fde033b313cd007f4dee2259134880ce0d6101076bb33b1e5c08 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b67a9ac89fbec649718d8297eb2966518e2f0e31 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:578226206cd1a7be2ae8d3210fff5d29f6fe7575e8b0222b114be3021d5886eb +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..93918be8bb562fef3fa84c0db5ad15151e03c846 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e30526b75a530fbe0c032edeab19ec1374ab8b960a2afc363bce3540b495447 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..71ed01ffd9d83b6079b7bde6a4c407c7dd722b7c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0704d0bd226f0269c5f70e36e9246f0def002af9b47294eec57bbc6722a56da +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f160a017831ae405978f2fc9ae52a7497811ca69 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85496a1b23965e9e3c880ae23ea1aae1fd88a00afc5b83cda1d096342f488fd9 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..93e339bb7a3ba109d8ce5b842943690004dc30be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cc9ae9439abdf90cf7030b27c7723b0cd8f60ea921c5a95a60b2429b887fa4d +size 8860 diff --git a/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3957c4bac84ec848e8b47400c016a0c04d77b04 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2a94459715d42a3e02269230405a98f597853a9cea5484b5e769f85bb714a9e +size 8875 diff --git a/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e12a812f2a7ec4b8acc8e50d77095180eccbf55 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466bb99e7a4eaa85c2a8673a5622445f759c174948f4c75a5c29fd3b0d0e7fb2 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d209f2156791294e9cbc43a2496239a6de2de85 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c865d2db855b594d06196db86e4273a3c6c5a35aee63c355eef0e11eee6ede70 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d902bc1c317d3126c36664e1fe1390e470c24e3f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3d7c393ff36dc26d55b94931de31c3d047236ebfb2014c4ff04a55ac298fc61 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0720d8a402f401cf44e7e1b81c7a71c0a7c5d8f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5049895b7c7849d36d248b0e2628630686e4403017be7ff824fd218e2dcbee7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..df4ab3beef46dad01da402fca2bf8ee112d7fee3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17068ff222ba3a76005f1b838dedcab28a985a0812ad0af61488a0e6a4e17e18 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..efcbafe30f50521947725b6543313b0b86817fcf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b42cc051c5314b7c30f18585bbcd9deb0eae567055ac729f95d970f1381257ea +size 2731 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5630ee16dce0f743d99bdfaaf7494a0e6cc694b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:317864c5a0002eb15ed283eed23158c1e78472873cdf61c366bbec30f4d99418 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0563719271a2eedb0ce5b62f064e3d9c6084a99e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c51f96589f6d09ad17bcca814e2ee6ef8349e8a9e25b6bcbc51ccef02c547914 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..74657610944ef6ad2653d39bc1b9a82f45c70a31 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd01eb1f93e1c231d2d07c78c0fcb2318a4b382c5ba9a32126b463c3b11538a1 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4f881cab9806e2b1fb6c7db33b25e9e0616ee7c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf114449fd6e1efc05e3f732bf45aa5164f3594fe42a9b3e65131070c0f05f7 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7147e8247df210f9546c99874f7e952479bdb73a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7320326472fac44f1d4b00a178d6d64d0baa1f1692ac59a19f1c58081deff130 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..219cb0f8f5fea6cd3509feefa00ccc6acff117c9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c31b8473b583bba4fe211bf8e4c36f3236ec874ec25580b0ea55a4563b5040d0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa34124982d0520bae2f4fb40be918693a616625 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afd6128cb7cf5e9563f74ec30ab6348f11e189e6b14c20fb5b23a5d04b30398b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..419ffd9c2a6e10f79f12ff4304bb8036806b00f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80862bd47b31eccca8cc0edc4a6c99fe7f63c98533b32b955d0d15dcdebf7199 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b03468efd9f16e217529254808ae1c97ce1ac5ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7c397e68fa371feddc9f4bf3780c1977b20600640f30d78cfb1135898706973 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..21b9e01739f44a48d406c5c4825a26ca06e3ef10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4611cf719373d66d3465098358bcbdc64fc4db74ddbbd15f1b71830c759b0914 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa7a299e187fb6bcdfefc2e8f7e691798c67d769 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99d9ed41a1dfc5128c83c0e7a4d129f2dff2323bdedbba38bd5420648889f921 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4172889b5e37bb3aaf0417c0bffaf3e86bb345f9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5d49e57e8b1ba00b7b8c82bfe95b2e577f26354b4bf24e5ce0cdb60e80ee31e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..22556a392ddaa5f4375c1c9afba098bca1a32d89 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd3b0e08769d79ff1ef7473e16f9bdfd6149e7438eaa8e029803944f93a4fecf +size 1165 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..23ea71f3052f5e6fdab1ed0525578dffa4bdb518 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82c8da846015449273694e4cb1067040a34ab20c8232f7f29073122cefbf595d +size 8860 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4563133bb1707ffabac63e9c6bbc7a396dd173f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ccb7ff24bf181c791dc735253c81cfbf166eb372658b2dfb339f19e636abfa2 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9f60a59a4546c7d49585a8cd3385e07beb2834c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a854923ec25be155e8d6fbdb518f2bb4881cddcf106495ba32c0fabdfcf1e393 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a710b3872d194a2f387e16feb2ce44afd81cf3f8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d124a08738c17562e7b3d701d411f5af50293f71a8a6be1d6f2bcd6018c3229 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..efaf24790bc86a6ed1317677619ef0b66d8c3dff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ec4e7cd7b13acc4c8c19c3df54fdb06f98101f6f7585423ee3c443f93da9238 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e8eba0e70a84fe00c00a11cd89b8b11a7abaec8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:041049d564a263a12765ecc8a1db82ed35440d2ee955a541b16fb03b46b64a15 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e120f6e4067be1b725e87913a26b5867e961bce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdf3f0c1c31c8a22819c48e413bd9bd3974af5a51bd62350d9ad14f27eccbbf +size 1180 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..388ca050e1f6e3a9d67ce6b6aa1acea257144921 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a653c88d00e1c0b74fcc678e8f41abe29feb9a45b85f8c5e7326644a71acf63 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cb99312f4550028202a7608c70bed8a7b416385 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:179492f01304bce05b6bf0fe48303e33038162f7c3766aa9c4d254d3f63cc1ce +size 1165 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0694600953f7167dc51a04ac76c0dcbcd459b7de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c401d309a94cc3b2c675637baa8ed75cdfb9ff8fbb330508d1dba740896db6 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4f897fe81303f0d067dc68070d3a3f8d8258e74 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35b38e98faa705aa3b758e5a4d80d4bd4b51bedd7e33a7ece71c28f26723872b +size 2731 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..74b97388e040a3941d3f31e66a065881f054ca14 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f40fbf229f5a6ac9e4b65d40a9c55d643adfc70adc82300e9006d32d7543b124 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dde053524ce0951c873f5153db46aac2fdbf005a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef072407b22006a9c56635c6471564b36b74b7f587870579f3638c404a162dbb +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..12ff5b5b70b5372811c3346a84228f5ca402bb0b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf7fdf73bc3f635959a93a15986f07462add1514c7cc1ca1ac6390df83f221a2 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..da0d6a46b3987794e1a99c159428058b1d03283a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42290344b9a217c7c902b172dabb911bfea777702d93a39c7a684622dac01862 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..03ec0e409713dc704d6e6af6f49b3bb67c155186 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a7bcdf734fb1d243fe773de2940cf8cd3dd491ac6e6c9beef995ceaa95da4a1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3d7cf72840f9d7976bbcff604046ce82fd8f951 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:474d7071848ce703d38adb1e742e6bf5e97e915f2b973c40ea44d88af884ec5d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8585ef23d02c0faf6d6c90ca7a789c0e45f5805 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c9400165303bf93c1f8e43883bd9d42525d2e1467f647743b21cac9c9f81f3 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.20.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..af7da9cf9f7538a1bb2692af4aec23bc9f158a60 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9a73d0a96b2410688b8dc7780e870a8891e668f696581bf55c6a51e7f2af79 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.21.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..10053e1405956ae217024af9fe4024ada7dc50f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f563fb2a6552e0997260ca7d2af042b1bef0f36e3722f982c86bcf3a4f99f4c2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.21.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.21.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e190ca8c3c6578476bba4d95b13af7f8b3627843 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ec9b775952eb97dd87a7ba69dff93721500a597b06607a8b7225639f091754 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.21.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.21.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..051154cd524f27d5f23094b7c6bebba8d847de18 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a9d03115c1af07759e91705fc995fd75b611abad3687b1ea4dfbc23cf94913 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..62ea7e938618ac56fc2626aafdcc6435e28ee53c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b27428912b75811d5e4a9560589723d694f393862cdc283df8ae16233d234dd4 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..94dfc4aacca13d76b189d2be9f08e3df06a34d03 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0182fa50e094a79ce533d2a3edc2fd531d4f47d7dc025f966fab7c77b924c318 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f477a16dcc4b53f2fe92561cb8eb62b829081d0f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a39a17f7acb5ac0edd1bd3674e2e5225c2215f5ffc8fddf32ca512a6746508c4 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..85f526d72b157bd6c23eb8f2a61773283fba87dc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:456d56de1f3c97b96ef1093877824bbc20b14730291a77d019acc5e5004cce25 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f8351df53da4c30524340dc04c3d93dd039c05d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a1590f27224f3ed80f42334041c52ce5407b10616ed3dd06947486bc2601c5 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa970355932687b49ac3f86f70d6e4d4965f23ba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c1aef630f66b1f1a5c8c40d5a2698ff457549f3e3ea0424430c74176faa2d4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca788cd38dbc31ca654456d00a68320157ed3939 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6fedc5dfada8b84174b3ad7184b3563ec947fd8b4e2b94db3a4074d9ddc2fdb +size 1195 diff --git a/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed8d91936b9b0309f15800e9165b8f6bd9bf20ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e678320f2b6ea2a95093c4af59937d107453e6e4779e06ab9c7b335df3fd3d41 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..507ead9f25c64bb084267113f864fc04a13457ab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f36b342e9e287ec7923b8253c7430051c542c7621070f3d8b8947ca380cb963 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..57fa6ac32d5a39110953947bc4abb436de2a867b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a52f8219f4857cbb65e24bbf60d9d0265f62a2a1adb94f7cc700bf7364ae8ba +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b685e0d673f1223920259323f778bc8878f55a96 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a414661956b7ed782f1698f7dd501576bd772a0454c4f3b11fed5b461db2b6b +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7f5436de535f89e6565d897fa2e6fc8df93f731 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e06c9b0ad851acd883903b92d26cc50f8418724c348a8739da0e1386ab77103a +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..105c53847991a7b0b7c5e1c98f893e4e2989dde5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7eb00e7cb8f4b03606771273e77d667993691b5dd9a1462ce8df7df9d8607239 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..16106a262b5b1de8cd849bfae0145ba27ea6414e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a8e41dce79f474f98c8e536c67a9d49e4bd37a7fffa70ac5b4b6b2c7e1d52d9 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d13af44320f22f5febee3b6aea31fc8497eefff8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:036a1982dd1790bea70b679deb1721179001f2c79d2f3e520cab50efc4805967 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6840db272a14d4e33b728c81177dcc996402fea4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b3657ebab4d39d1abb21178109396564d6a6ac0ba80c214043ceac0d06b1c4e +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9980b77b6cbc740be9e6db3e6014873dfffd546a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48fcb315809d6e36a0b923ea31102fa36b8b97c517f581505e156b6666c9d917 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..323e19f2f9002100876eb5e9ec166b7849af8deb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c8f0343e8fe2eb9ea5b1a26e7d2e1001c09c662fb322d31fb2bb623f2db7dd4 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c60b61ffed28c46a35f369ca8a1062d761809a11 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7e0957b42add32a688570b76fe474af1a325b1ed012398708505ced1df934b2 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a9b7399e30d44bcec7d1c5f4805a2f7850a7d67 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6152cbd740d4610ef974a8af2c41e98faeda40bc625dc9b43e18b711529bfd +size 8781 diff --git a/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..416b81346ca9bd4d8bde2806f08bd9d029ce20a4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8845f16e81d4bd75c31a7fd545c5971f12df15380a600b0afa2fb946cbfb3bfc +size 1180 diff --git a/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1a77a787e6c2fe9c3e2b919ee604f72d37668f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21c0439183b3d02cb36d3a1502d7600ce68706c1ce6c0b489e2ba8ba3d759c28 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..30fb75e3a48a424243ee0a74ce02befa35de2ba5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43e9e2c18f99d2ed794d27c9d4231dd120ab22de654c2a60121a8cd49d05751a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..39df9dc91280b407192156922e422e9505fd4b4d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e9c73c5acf05f5f9eacaaf5805cb6036bd5a8edc1d0ce013939c12373a9072f +size 2716 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..36b195f92a58fac5b46ffb804256628872f4528d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8afcfc09ac62ec5df4362578b45b3467f83498f17f1724c6e40c7db301d64d2 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4338718d45f7e097cd126d31c6cc2610aeb627a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d79b759a1769891fcb5069911b1ea21aa01012ab20f1bf1d562aa6f853cea1d3 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..922bd6b79922a93f6e0aa547a916d77347453713 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7328498322c862f82f3922be9c01575dc0af3aa424548ccf97db41993ece291d +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..286848e18f16ae196f1e6d256c2ea593d9225c77 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:449e15ce6d07c08f635f04a9b861772623eb0d90fc4a1c8d442b05f818922a64 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..64f2027c22bd3da3c5ee66e0bf2a0ea67092d573 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc6cecd5b2920a1393da4c2b63b7c98baed3ff2eee806cc9d2ccb82a1812bf35 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b4e52936e93b3fdc1feddce571546c546a0fcf7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f4a4a0f3a7a5f52e957bb3e28cc0ccd1cda733d3df1369402de465c35f1b2b2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..52d333e92c686abbe42b7f58d22b611e9bafe5b6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066d79515f78820a5f4aebafd9134c60c567183bb772e92705ca03472874b472 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ea13bb73b5cacc8ef8dcc5cd8b761c57e701f3c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84cf6bd3b640f4cf75896c217e5622037a06afc2abc35acc89c6a6c6319ccac1 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2214f8f78d6a87618aa35c3b63a0ce99302018c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9bbc7b4b8bfca44d1a9e54036961039c73769fe4f554ca5c92ac74b9fe52b5b +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa47f604da6a7677d552b1bb9c961517576f2a88 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d58e1dd017a2cbc9de19c7fcf171a722b702bfe65f8b1b959d9085101c21c76 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3cf7086c1a8c50ab5541b8ba9b5b993b3f1c2b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81ffeff557e1fed6eec10904983f902be802e1d7cbada688c50dc8f441a1b50 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cf936d4850e77e38caead3be101bec735b0a6b2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d256d3c8984c98a5662e04589c7ae6e9856d815a4e74aa49b871ea3a8a0d2c2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..13735830223db73ed43d839db330de736a0e7dd4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:928605093c0f7a5eaee34e0695fd40ef63e74e34200b9d26bb5e3c548401a614 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..626579af0d651651af196b3750da69e2739c11a3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac0911712a51f6f0fc1d95de997814ef8d66bc76f166b9d7f8e4fc010e94ea4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b67a8563dcf390c16473a880c704b236edb022d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:919992fe6c348e5e76eb1890a41a3290f3fccd0c7a90ad61fd1519cfbc2e4192 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..554f031d84f8061b5fa07782ee1b7a6cda922b53 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:812c3ce963ebe9257258b4c19abb7eda355a470463b01006f0df89f8ece7c458 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f60f61b9da6376c62b3cd51d7ca96071d42f9cf0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73a0050056d54e20abf93593406a5561272e7b8b4d17504f7e516c8178fbe883 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e16dcf75f1e4c240a470a7cd563cc1252d91bce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a894965b768b0061e324e7e06a378202eb531f87828fbae94fe1b7b02018ab9 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..67da5a2815168ad326ec33fbbe4b4e37dbb63523 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bba636f3c08a9d7979d26c2c6590d21593985aef77dfbebbc482f1b3f7fab4c2 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f87d900761b1b269126b5e1c5214ae20284c5fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27998f8f12f0889f92cb98dfff108c21131609fa8d55857245de889451891173 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..881547c1dc5cdbb2938e2e169d37c7ee171bd8ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:450217482be9b21f46f5fea1b52f61aea0aab212b4b4b8f2a61f285728c51834 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab080443c8dd959630a84cadfb839af1f8d5520b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e4dc175e208c4f03ea558fec1a78be6844fd8dcb487bb18c2f49755d6af1f6 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7bd35ab8deeaf3a7a98bd9264ce95aea3c75b0d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a5cb9dd78a5b5a12bc49d79382700fa088281fb09cc7eb50076e18297b201d4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee0d8cfb6b3c1a64bc7fe5a078ffebf5654a57e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3aabe6dd0f41140e9d7bd15517883d2eda1e0e6ddb62c1f57facc82e52240402 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..36319466f6eb0fe21d84cad0bb55355699017b23 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd58b31c31763ff12c86869297ebbfe5bb0ad0d2f388eb8730beee0414ddb77f +size 2731 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..310c6ebfbb57544cfa2f5982ec0ec87c1c15302c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b79823835bfc39174fbebc524ebf254a09f4ed8183ebfe5359aeff6ee1618c4 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1aca1337e2957cc7789993239445c3fe4c862059 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfcb54616a9d7263849dbce472736457f21a98e887f0ee08476b4780c23acb89 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bbbb811dc5488d4931f48476b3fe56e68e32ad8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ebda8cca78e58abd73d30e0fc56e5c3de0ab01e2af94002da7bcef5eaceae9c +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1eb9b862ef025860bb0d0da31a0f464f02f9513d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ffce562181c2c6972ed5f1606e1ec78d02d849645d00cb37a9c0abc32032f54 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a54031d4640a160383ec2f5e21b107bf827b144 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5088baed4363eceeeaf5bd3168adf821857e4bb1db84e15a9917856b2d05218 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..195c619523da04ab31e78a18b5c6bdef8ed48e55 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ac8446b19fad649bcc37f04846e74427005c028a6e52f939fe0eddf0c7519f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0323c1a60d55231e6941092489aacc4913c74db --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7951493712c425946f57dbc24226530af18252a04efd07196cc059eac53c017b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.21.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..142b1e66c4b6e9db719582cbe23dfb1fefef5bba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7427ce387224bb9e0230601cb49e67e2553fbcb69e4819c0e46e6b1ec151ceb4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.22.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..03ae15e45d54efedb5e25571dc252c4199e457ee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41f3de3c509fac7c364cc14ee04271de6346cb15db6a2ea08d3ef2e15481aae2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.22.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.22.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c5b2baf1fbc73381390cd1a5cf0783e466491bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:293f2385efa3934fc6a3a3703f1b5612494fb6bfa4ad83633eb886065eed2988 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.22.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.22.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d89b8456560401d9a9090f31607626f6e56baacc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5be4d404ab434858f1017494229a97850a87af2bc1c591624b6fa01c46a85c28 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d0880bf1e940769871efe0e9827970a1d6de615 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfaf625d7fc39203936a777524a2fccd04647718669e7935c4daba6a106c0c57 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cb734e148cc763516045e297eb73c41cf53f5c1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d7523166634d32da507b9c1a67dd8bbce64d61ec893173de2726802f94e3e5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cca2cecdd6e893d3cffe427db0d185d6edb8dc0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acba3f896b147cc5679cbfb0fd02bc79aeefffa52b6df77050982ab3faf6bac5 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b971bd58b6353a878e26f3316f3b623c0277600b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:368cdf0967e61573b2c810d17372aa8ea1d0bac5137cd52ccd6eee7eac883343 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a34959b778bbe186e9532d2510661eb43f4bcd82 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cce0562a80ee0c4f128bc1d22dd406dc031a344742e741721cd9d908a5b1f92a +size 8781 diff --git a/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d86362f2c50cfa3ebd850cc99b3e1412c342650 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78db7de25b66f20fdae7bf09884de9e2a12dc7da66b74261e22f9d66211de5d7 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..573c2b5492f8f98a2da5d5daca49459eef6f8a85 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3372603a1497d9482cd7d37713718bf2811de2d7b9049d626a65c29af5e2aa71 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ed30f0319322606d9d75438aae5cea4d16b00a2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f579c232e52810c4140e8a3f90e59a68aebedc1bbc8f3e6df91d9fde3c4eb32a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..17000041f6ade48c1169ef7f9866de4f496b6fad --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99cf5b0989fc2362c9b86db408f56ebf72d5c85c467707eccc21e617763de192 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..457f29e89be5be365a85ed3dd52aa81ad26207f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:910acc7035860e2d8d921da9293f29ecc813e807667907ed53fa9eb92b52050b +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b31ff77036c79b99f4b42d8390ae10dd6332b873 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca914061d62c9e3f422ed8463f498f953a9df510b77b00f8f0003ed6d54d177 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e5a7eb1f45c5273879636959ccf23bcf13c220b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4cee2c06fb8de874075760c6d175ab5d5f1fe351f0a6fe756b0531aa23a6cab +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..809e39aba8608a554e93794824d49af6683c22c0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bb12eeecdad963f453c14ec9851ce87bef67a993c0b949c5e0530967ca4774c +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9607058fdc47b98fb4e28fa64a4b8340b17e969 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc3e4fe5d54d9ae5faa5f52c1154faeebe46f93ee473c23f5971382885cfc705 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2bb4c20cebe26f74d97e7e0e03cf0f81f9710f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22c71f2177572fe7cec314770051cda3238502333a5f893f276bde10bae88cc6 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7ef6a0d222335aad2aa43b2807c63a91dbaabfd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d526bf8ad0dc0f0a751068138d9608f81552d20122523179befa3628b5ffabe +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3c4d0c41b399d2e61fdd3905962b48b6afd8752 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4028492d6ed639c49eb82aa2cb88f8682d6ae8d837333f71c8a326657910ce7 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb61faf5aa7c0c9fe5798de21ab50c12e570935f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cb839d6c8e538e384cb67e3637febf8c97299ec78aa683e1c66235ab8a31673 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..128d8f599b638c58e7935058c229b80397f88db5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab84c944dae4691e7fc7d061bbe2c18abca91b370e79cbc3e87023f2773a726a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a36191b15ed24aeed26d967db93e7fd012e025e5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b672562eebee4e3fcea7b5bd25251fafd010556e3610895c73477f38f44f5864 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d35a8826e0056b1e12304996d478f8ffd6b510d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e7a5f7b0d90a632079ac204a144e370a1f584f14f39d95316de8d83358fd7e0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1847ed1c9e9ccb02bc3ee12e19275f335ee9e287 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9767f72e9499f29c1d8c4db2dd7b5e601b5c20146737bdbc25d12426012af45 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a97940213fcfdb87e65e12b825f9827ce88b056 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c745293752cc48764288e6b8f1b21fbe1a764b4d18206775b341ce2b5d26de15 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5cbec234867d2ce74eff2a5a7065e7ab66bce9f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ddf4b119ef106135fcdd3028a0bec211c9dd1b672e4a49b796b63fde5999b03 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..37805e021499328c6035600322f1faaed9ca36cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56cec2937931a3b20ba3f876f3dc4a20c36d8d9f84df703a156cfb6878a34d75 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d00be710b038d2f7d3026e6c554461d3c4970b2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f19da3eba00f838e1063c4a4284746bb5c3175d825a32081b0e1838263fb63c +size 2637 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5430fb3dfe73ad96935ddf181084462e4ed5248 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7936258e9a86f3ffc3123d105a664e1d2569478dd17b936aa4cc22f1c3cc765e +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf40bd2f13f7b955221dadfa8b3af9143e00bdc1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef291123a7eb2b04188d6f173e6184b1eda9f7d7082bd31034815a5e010958bd +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..412b0407929d08529a2ee8ab43de49333a3d87d6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1797fe6f2a66d015d9cce987326dd4d50a04736dbb87d0d2d8529d8f29dea8ab +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..81877ecdae545c7a573513ce8b79eef24cf153fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b76694dbc992e06088102a334eb0a36c4f94eeed2ea698d887a3b90b5e58748c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c17d2a79595ac1798fb543fcdde7aa4bdfc67207 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7198621cc26391c99856ca184813e67af0261c247d916c06d41884bfb061ad63 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab637e9818548d8d88bc723e951c923de4c16c50 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a09473988f1c6407c05c57112df9e4d31b115e0857335244f342d1a17786ebe +size 1165 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..01a211d03552d7662f687091967911282a9c9042 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65690534d7205acbe88c3e4f2702a3de38cf350895a0f2c4a0273db47d71bcaa +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8f1abe3e29e650f1e7718743d89a8b0faa49a51 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e5ef0e98587f68fd05158e4c769731c8d48838a1ce9872b9becafba9ac10f1 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c734b3056535b20d89cbecd9a83943c02eb998f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3900379e77ca0ecb6fe1a06bd5c81da6e19c48e0e1bd2acaa10d244a620ee358 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2a9bcd958973d5375849caf71f84c98e52802f0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0cac60b16d17a4c9ffaeeaac827cbee91577e0218687141d75f21715d8dba26 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..26c2b012f7d4433bdc5af0019df56c9feb9fe9f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aee840b4c1c049ef56748434e37267446eb4703fb287a34e34d48de9fe6d2029 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..06a061116ad36a7f895034e5e06cebb4ecd92cd6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:496c89db0dd5b98b0ac71c65b4b392f32d4e5e20dcb143147157be79e63c5d07 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc18478c2d0e747b5949879049644e95f2424886 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca83f115453a538ce05830e1af2febb4404f5befd66bd1522c84332b6a2679fd +size 8860 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4f3272425baa9677efefb703a21e885136373e6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:571a9fd3bb37f3e1e0463e0d5a864226ae66aabbfab560bfff49893cfaa7d98a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8f07e63fd4b45e287e2d29c416642109c3e4035 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27dae8f93963e40fad3f7c78e4caca5454bd3a1503b744c5352498a7c8d32dab +size 8781 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0bba0ddd02ea2a2ededea434c13c87240817325 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3dce6f542882b28b99612fb65515299808bc744eb83ff93957818c5747e139 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..371c295e05eb3358f7e0875cb14bda1e727fa8b0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0046d1794d8109f9f98c74da46e6e78245acc5884961acfdfeb4b940a29dbea2 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf6be093b2191b0400345d149fecad05f3a3f46d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05a4debc0fde5ea5941792604ac88d35e26a923101853e60d36e889d65f0d3ad +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b998ff098f4e6a3e35dd0969d686ef4dfe60d601 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8cc7dfff6827f61180cbc57685201f8a348ed154d4f8194b971c1a2fd1a0465 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc2e4a6a6aaa1ba761843a0491b9e080d534c3c2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32670716bd3bd3bb2c4e950996a36a00d77204dd8fda4c185e0be7490e2b7620 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e6edf601843c9eba52dc29e779e6c57fba4a60f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f4e4f256049b17d43cf8aa8f4cad96aa54ecf0d7ef5021fa695e5d6eef1a1b8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6aaedc3274a610c4aa85a9518e8252aaca1fd9a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71b3a8f5d45460909d5351820240ac00b36169ed1f6012c526b1454a01fe5450 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4043482f323a8aa2c4c4208dc6cbbd566199a5be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d857a757df55d124d004edfcfe78283fdc7448d1a570d9fd39f2f2ed12fc0e60 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6e1615036d2ca8f66aafb2f51c91c017c1e3465 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a98bf0b1460ff8ef616f713460d43c9565e4f3366c5496dc9f45aacc0c2a247a +size 2637 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a4faf5229952463b82ace50429603b6e8b63300 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f8be8f300938eecaec2ca98073133d0914f9976b99d13c4d74e4a45dbc33c66 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee1f934130ca4199f5334b6dc39951a4d2f586b4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9528f0cf9e1e38f5b8926d345cd2f03359c479ce2794fe814f8733dfbc66ea87 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7f78bf43ae6fc613a43ae7b6a1e6063f60792e3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ecde6f88141a1023fced48d7fb3756048a5907ae3a5ac0d0eed523ceb0d8274 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..46b9041686fff290f628584bcce21aabac0c6ce1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6651333fe7bd367f13fb3ede3600d3d44632bb410c50a6d3a82d0465152e6428 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2991ff68dc8980aa61397e816e3dda8a2b84d1ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96dccd05d0a5ffa6bb6a8db66ae744ffb39ad6cd5c2190325d3e599861c7700e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1514ebfffa4451cd3a3bd4128fa08f082951b05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e74d145a302c441c4ad90a9405c20ec27215b5f99e521ca816830732acbc67c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.22.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..16e235ab0405444e9bed2a92575eae1c4e217747 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d15075a1ffeecaa1194e67ec163f527b3e8b378373cbb29e0bc1825f7af1de8 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.23.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..14603ee120064cabec8bb24c28aae108d345f1f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9570a3b04e0e483fa325a426aa5c8c8057068348ab2fd5ae5155697bdd79198b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.23.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.23.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..856f3bbf9f4ed7e7aa2afadf7a9c54aa36605197 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:987d204e76691cdb9d468b1f716c2d0a8ff8d4f0aa8360f6f2674ae9e4219893 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.23.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.23.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0b8a57e19dd6d57f3f141380c7b16d8be3629e1e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d5cdaa12146abd8dae0a283d932de63edb60b42a20ed8fe4bab153f01bf72b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..54b1512cc9916055c158ff59faeab3e50a63b11a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1d4cc866ad5f22aa669489eb99588240dbee7c9463f2083b88276168df9cdd8 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8fb16c69789706bdb1dd9c0895c4f0889fcece0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05cd50c34e18c9068e969418c3311bc947e71ac006bd73d286d977dced32636 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f00119345b285254d54e270c65cd4f4c04d6fd8f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091b0b7b7bd67fad60ebc54f1b7f96e8b8d5e80f6600fbdf258128db7a8095b5 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3cf5032777f94cb1836a1aa7d6397ec9c046fffb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00725214bdf3f9e3618e7b9f3b442ab763a5028b02be0508c4c281946e3b210 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0d20104fa470cadf14823c62cced4f168be9359 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b6ffc2d02cce4cbb5edbc52bd2ee693656c2de9e14d920943705f1aa1a7e789 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7c7a5981a42fba82e961a06f9cdf84e85532f46 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbc06e2c675b7aaa28e4a0ff6ddaa968660c73f5072e91721767aba930768683 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c8fb60ca94fbda5002a0d9d0daf367650723950 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85ccd08f88b8ae31d274144c51cb64ef387cd7cf3fe693ec510a6648be88ebae +size 1195 diff --git a/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9f14ee2c8a1a65d1fd36182bdb81c12dde64c8b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c44a4220690640dadfba37927c709abc071d47245b9c0f883066f0ad7b949887 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6e44fcd0e8562545d1509c5d06fd09cba9638f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e723a73cbabc87e43b953d609cbebef19ed5d9c6e99f6c4866d18dad0077e6d6 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbd7b7a54bb2357d82b3a5d86744f5cd78c0959a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c86f37cb15fa1b4c70769dbcc4ce416ea1fc54f601931e1b2d099294a137fe29 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d47f8b3083a02206aca0652be57e035044bcb19a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6334e276cf6a5b4b48d9d91d2deaa2008058774e13c14160c02a6a8447912f29 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b692cbd353e1f771e8e0742d9d2307c473a79bd7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bc1303c4aba230a5504d15916842bbf678efeaffea07e98f75475e375bf1f56 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..017e7dffccc691752196c85994e280d75f257f53 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3f7b1afa5966432a0822a3b4fbeeef6c965067a34ffb81b72f5aca313c5b4de +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..542e49d8b2a6543129eab31495665e0be7d90f1c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3552ada67256104fd17b3ca52e5666175bd14ca3126fd13a59b19b6a278b022a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3ecbe65f27a8006975b4ea0b81f270cafe96806 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c2c97ea1aa4c652292cc7936268be49957ffe62fe5fded39f1172dbe4a115a2 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cb557c3941eb635a83988a6efd17994c9bc0143 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5dd12bd97ced84220f79da4f6f3eff04b1e11f64147006640f3b0dd862a604 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..732abd386003680e6bc8a3fb69891395b0ee86dd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff25a5acd2ae3728dd11af8ae16699f84bcec9bfdc0d3b5e99f45acd1fbf99b7 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e52eb00fba7f1598e4fb6042afb40efc0a5069de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a0f4194f2e30ce7aa895225f725838d10e1cbea81905e2b63ccb00b50523dc8 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..30d908b31a6ebb906ce4c5f1042e5da5d0d5a2ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61540c517cd14cb28d6e427d29106e81df3c9775201ad9ee26ba07fc004c75fc +size 8875 diff --git a/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e335f8710072e14ff7ab5a0d0dceb66343e2dc5b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c03c9d2d9c1baee9efee4c9c992ce5507552606ccacdf375111c3a8917bd8940 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..893ca4f037ce87df021d757d4ce078863cd26999 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cf15a801ac37486b86f87778848094bf4f0e51556d129aacc1a6e426ff890c6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed34f860559ff73aec89f65373fcdaf7e5b2cce0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25486f828017b0d83d502fb79b8839a19674fa695070a43972ff9ebba20d573b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..003ff41f98b83db5d945d2ec2bfbedcfdfb4bbdd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7483d24250144541aff8338009f72e1203fb180707d55218f7b1ea93dd9d8b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..879f1ac4fcc5a8c401bd31ff6f37e3bdd1550646 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c436ba5a99d34662de705ffc946a5327fac0f23d80e15c890750cfabe3ade76e +size 2716 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..aaddfcb036e7ea78977b740207451d3fa8b526cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62620a6205ab4d6dd763d53286c185fd89e980f7d9b01609e4e898b3cbcd0d49 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6456e2d7ebfba447fe758cc890cbc7296b51451b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea6dae294a97b87e91c6488cc2ffb359ce9bfafc75fade8cced14caa4cdf6d3b +size 2637 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cd4d486ce14d6517077728413950eb2d2a85f3d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a42f9768b5c44c9a75518b301a9d4e43889c02ea47df788616a41420ee39478a +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..90fee66df0aae06046bd23c82ea00e8f63d6b233 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9e3371d14dce11e09087e198a0b5fe294d1bcac993071910e92bd26418ead4b +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4be6358159f5d225c1f3c26de69ae4c0dfa20ce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2435858ee35946e6bd6971160c746a456f2c8ddd11c28dbdc1407cc252c3edd2 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a6e8d6c06eef719a004fe8b110bd24b6b584f21 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b3d5b0dbacbf0c76a34a6a2be22d08615238981415e7b63d35a83abc1cf1e2e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ae7daf2991b961e96c0c48950b4bfecac0bb030 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24722b7fc8cffc0eee34644e8528f64b64fd036dea45d416f826544e3aeef72f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5c28cdb352cf6f4ada04d2616a886426679486e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae8df94ba6bf1aedbe03c6e18d925ba9dd91ab996998afca45c5f4027c8ee5b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..62d6fc52f41f7979f9a6209328d00b42051c9c92 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1693df2926400356858d6dd8297def36d68fc4b1f041643d734e97a9d72151bd +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fc8a530422fecf4f8cdc7b062e4f7f0a4e355f7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc06a4c4c27eac5c62edd3a7c4de035285071e7f9fae87f05f159d89d93a7f43 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8346f0c3191099730c37473bf4550d76c6e751d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a20ce881cd74fad592cd350f5bb0de862c314fce880840aa1f4adc269454048 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c075de48f56e9077d53e3453a65ac391cd1f56d7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f377cd4749c9ba648e3316d7fb2fc4f5ecef67e7ea9f30aef446ddc99f059f4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fdf0f5d2da75e1158fd6bcc9be2a7da083cf3537 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed5e362281cae4662f752461a06a40ccc11f7614315108f494fe0ab67dba5d0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c1801b5c331c6f93b4b6bfe5654b4b9786871f8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7a9471aa23483d8b9cba762131a04ff7d2f3c69a2790cf7dbae24f7fc219f58 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d545be05cdaf8465a729117d5ecce76333d8ae6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89a2a3dbb05d2f969b68cf2564e80065ce6034d1c5586c527a5338086cb3da96 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e578718307bd127973c9ab47512db08a392e5d03 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7895c40aa6cc9cc65ba4ae74f8dcacf426dd02787905160e69fee5b8bbbed1c3 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2322654028f9c7ab969bc0d6b57da579b0ddad5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32da30aeca4d54338fd3dc2d3f10aa8aa6bbf04f05ee2ef9aad726788195840f +size 8781 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..30d4a0291ec513ca4be3104fab255eed391eab6e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a414cea2a68caedbdbd74b328eff2e35fb6c1855769bd75f1ba7836244f445d9 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5e329e93f4d7449e40160bdcc8399b308646357 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6dab1bdc9da8095e9b7ac5419b674a9a771df6e3d5df767790e324752e511dd3 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b7e12f5cdb1f829d28a8cab39928eed61a4baee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4b7989293c7eea702d4b73dd42778948b52bcc78a41e1460aec424e5f704592 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c25097ac499aeb17acc6b4a9f0b4d15f98b88f8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dd5ae5f521baff09c8137638514bd31f6b61e834553c374a5968328e2cb195a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..66d409fa15fefe187217a1b29cb494312234ce62 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ebfc9e2495fdf3057ce6f37d4ccaa8f7b682e2495c2c648afe8f4f86d1e021 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ab918574b4bbed6abd6bc1f4a2b1a1d82c30f13 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d84e90ecd61db745d07d7ee8a06d297bf293b4e2594c5d74f60046d4900f935 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f066c3a2d7a0e9131f555bdf584e63861f667e12 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9161ca7f999ee2dcab5ce8ac09033d7ae6eb942778120356563954b87f71b46 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5ec7c917426c7e563622e8e32f21695a840c2b6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ecc9ea931f94cf96fcb19262ceace1145abe8a26d43fed8752c8567ef7140fb +size 2731 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4ecf80675e47f404af5134bf11276c07794ead7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:347b4a9dc0add4997cf6458be11cab4b1436e27a64ccfd33b61d8687ed8750aa +size 2637 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..60ef3e7c7e134670fad65e9cbceed0cc35144280 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70a1bdaf8158acfd491c3ac0e360889143f1abbcd2c4a2da6ce65a75ea030d21 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6387fb64755a449514691d5643d45f7fcf62a80e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f471117d277d4dc37ce492f9765809330cf38e49fe6ab78fd39b32a1bb392d5 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2757bb7f15187a4a4ad3d7d5fee3b41860eca05c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b164d92f9d1b4cfe1ebab33e70449fe7f99d81184d1d0be35033db47b88e5d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb5a3700e35f394f46668d7cc4c00e705a2d20fd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77201b41a09f230fd4192c4ca635f95feb20a2c0e736f1c478a916e811df7a2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0737b85aa1437f278c816b21b889e603e1b1704 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b3c9f365ef68ac57aaa319627911857fdb8ae3a214be183d6002f6719c6dc58 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..27745f6f88c60167b93db4e41b8722fad540f0c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcf83b59732e2c82fe1126203bdf940c1c2c0e0b8d0b45be5a6449457ec03add +size 1165 diff --git a/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.23.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f9592b94c89e2c14dd5cdac6b24882af62e3528 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79f08cac95956ad0ae5163783a11019506d2e0eb5a8f6b45f5f18ea9b079d145 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.24.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..458edcbc60cf5ff136121c65a74452bc01e6f867 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6bde93bc334115fd34224fdcf99d2525a010d44de8f96f402ab0af5633bb15a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.24.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.24.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..941427819b59f52d99d01325f77859e603dd4060 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d967b5568f0f9067d43311fc412272a7b143bf1bb018d6f3c8170b43e23897e8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.24.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.24.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..21ec88d01af9b04f1311d2e733bcd60f35cbcd18 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b005e4445d789ad2b126cb7305f12d5cbe83f199aff3bd9dee7458f37dd221 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a3cd8bcb21c2d6e30ea5a61ccfbc1c2c41a1cff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c55641b430f107cb8d6f0c01a839f6f8425f4f0e4429d60a6d9ef2d62dfda833 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b04f8d1b71b1b55ca45dfbab31cd3787b3b142e5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e82049e8eb213fdfe945d7da51e73fa3dfc49a04b5e4bca5feaf1107e6f22b1c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b27259d68c1a1efa3ef4246f6efe6b1118703d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a88e3ada820d03ff3299f20eb33fa6abaecfe26f810b714af64a7ea9d2a2dd +size 8860 diff --git a/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..13ff8b17c19792817502f53ab7fd0bffa956dee6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4fd00a4ccf32ce65441b2a88347b66ec03dd8c217092270a1cb96eedc715e55 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f78c7a1be2ed17f770d03f91ef0404687946816 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447ca8cf87276dee995c969382eea84dadfb437d8a44bbd3906c08549ad9fa5e +size 8781 diff --git a/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec1a9bbf4f46e4c32397555051668f4ef921e880 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c46681a8553cc43e02ebb1ec07999b21dd5ca85ad730295c7633364db55d95 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b3525c7827d75f7993132986fa0a77aaa1aa69c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7ae1d8a305cfdc733fa2c58dd8425325cd341983cfacff30b7e911c4d8eb88 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2c5b8dc29c1b81203c8fa6066c8a543d3b65825 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4d608a7707b7b654f38007c763980cd44b5ae739e10c38d7ae6df61eb0fc660 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e8206e7119da5febc89eb8c733467b81ed9fe38 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc824641bf8728395adb5eedeffc2918feedd57966adfaca486006deae0eff51 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..aed6e1ca42ffa225bf43dfd69d444ed748681ec1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:592440406c31c3956f0c522ea2aa4059679622985f86929294dadf8ce42d5226 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec365a3ac4b7aa5557a52eedeee7d5ef8c3901fc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee1857b04b4d83d8cc0cf80190f9b7102f4073782598751443d8b99a121543fc +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3456a254cd9c3b5678d5bd1b9a84c15f1bf0698 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:744f8f30aaf17ddd6423570b4c3d9a4431bcb14b03c5835673e2791ab019844c +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f33045852dea31800b8bf6bc73bb5aae1138b18 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d73a9cc8d5a572870ff938bc4736c684d18d4ccc5940121237816b57fcd0e6b +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..57e8d2c9af0812e3193c03cd9d73b509eb8536b5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc25e7e20dc3459e4b5081b6b77cec9a8e1687f5344668fb3ca3a8ba1ca06caa +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d0c095b55527b4455c868ec46ecc16503f33db8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d5796dc4a614a5fb08e6ecec7aff7ddc9cf4f47c1741a0f7b490832c33f1dc +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f62e3a0837b3ba49611c627469df811d772dff1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53cbb40f3100aec3ef0013ed88aff6ce0e382474706545d3f758f4de0d1eca32 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c2232d665fc338b43dd4fb0fc15cf0d43dd1fe0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd18e75b6646c356d1178a7b1215bc216118011bacdae715f947347e11a8e02a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..70514a1fc50110777ddffd36014d72d1cc6b8b9f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8fab9f3ad90ecc036a036a691a60b7083758094c0773369845dff8474fba758 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d0f79f48f8d9aea9725f8fc45bfd62a9ada08f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a82003077d04f2830d3813f0bd0ac86e5a86a73b411647db9a586de122ebf43f +size 8875 diff --git a/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..78698f92b85f8c2963a6b432c3e211a61f2e0e9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b0bdbc22966b73666d00fa0b27adae26f5d2da1d8faeebd2ad09b337a3c8ad +size 8781 diff --git a/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..693bd8d18cb966b0fd49102c5ce588dbddfe77ab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efb7dfd2103e19db828f5c654cec415a8151d3f5da30d0222eea0a85f9fb7989 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..eea8960c04b179f969b4a0db2c836b1f1db1a3cc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cb5cbc51478000b4a96318e9085fecdcaa3def83b756984d17631a29a7b9b2f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5e1f31fcde0deec5c5ddfbcfd59909ddaed4b2f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c518b699f62845500baeb52fc620f24c140683243e809011b56857d4fde71ab +size 1165 diff --git a/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0b1a610d69ef7c3b54a1d6d46046f1ee4d01b51 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc95def07ca1f1013399c7d45ed9f5bb3c9f061db94b3191260182c92ed79e71 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a4fb34bffd0552edeecdcd00919b136641da049 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74435391c24431f2b4c98e31574118166b4573c2533806790a894c51ad275e4d +size 2731 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed011a96363929304cf18a06f309b5facfe017a9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e6603d537fc54efc2e5c8f75a9c02bd62da94c8fb8bef4201fef5c0af083ef +size 2637 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d39542025cff3bc98221c3488b38c68ac7a00ecc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6026c70ba92293f690ad4652c1f6d52f8311ff4aeb5f7693ae17889c8654729e +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..af79b2e96b8c0a669f4f54495490c9cfa08cd54b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69d5c6a973b1ad7d0a8f1b63e4994fc0ac7278389bcbae3b3c43ab57dcf7081b +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a34817bffe709c9fb6d29539a22ad0099096220 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b999bfd65f1dc761206ee4418c6fd17d8a0216c129584fd0400caf04bd41e9fe +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..22e1ab34baa99e2a7a39c4ae523c7a2803dbce70 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1b856f6df748f2741b43fa5ee5c631c23f1eff1cfdf50f299095e4d9642f718 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d92552cf1e9c46864315b0121864693c460b22d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb529117307669e2e90cb5a274e6d5b0239a30464fbaf36fc3790db2c2e27e19 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..389bdcdc65a529ab2654dcd05400d99f6ee45775 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e4c8ff163867468c0d460820c4a0fdfc3d4544e31b66ad1adfe7d5e43a3769 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..427ca9e65d171072a4a4aeb9ba014062b4644445 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28864db7399a62ce49f3a1f5318e854ae24a8b4014c9cbc05cf933225bbdeccc +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9dc546f8480c6e64b2f828c12d9fb2173849421a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10d9cbe96f5a31380113664e968a5f962fb1d85e21b14396bde8892425775095 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9305981965e8520ab36329aef6d9f0f7e353f445 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ef68ac10271d0659fabb37c8f2db786ffc6b4224242666cb063eb611afa879 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..18c989581c21837905d3cdff13f5c644b83d698b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9be450f8ef92c1296a5b958bb80962cf6ff35189c8f1df60b446fbb15d70fcb3 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a44c729d6f81e1ab234a43f417bd30fd9517def --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5122d3c947a30db82a757d0bbb316cbb1d2cb7866b6994283f56f5019d918293 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c15e24072e77e5f3d51d9043407c43edfd5375e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81fde9318cc10fb08302b162d1acef2be87d9ee0dc0cbd009773886b19ea8c7c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..48e5ce0f42be406f4b8d1dc6fce73de6e1b6e31a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55fae8cbdb994abfbc392deb2adfa729e6e0d596612be3ba26ba993cf834260c +size 8860 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d34a24833573c8ecaf32f5f35dba8aa18fde4108 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f0d74857c3a0ffb9ca5f33f7df1d7e373325a6adceab3895a0ca232881c5b4e +size 8875 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..272c281c430277b59ee27e4ae9bd49d52569c513 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0ee83fbeb218cda437345af8d161021a219de7752b67beed4d358392574928 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d137f6aedbb2225892606243d229607e2d25dc89 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08066923870e78ce7fc48dc12a30f76fb1ea905c0f0b76554303a917330d302a +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..86395bc324995cc66df48cb894660b26cf937b91 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0849da589ad2650f2f7fa5a99017b2c65c1f30e704f8fcab08c8ccd710ec2763 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb2625ac7b1cdd256cd9fae523674c394eb56a1f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256cdecfb407716b93860e1d3c11170b12d7468254a26928d50332464c2322d3 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fee29661ad06b062f6d6cfea2ca8993e5f380ef2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc042f7641df6473c45f046aa51122197f2e69e4930dbe0d3f0c8033048c47ea +size 1180 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fecf304be75ee7be31c50d37033a8df1bc9be767 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aef2ff682332dfb89288f551267df6773966aa83ef4c6f31a372d817615919c4 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..216daad95436a0b7b47fe4c847b816b5c0c6c42f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7e8914b94adbe889fee41afdea48d9551648a2f4f32b2cbe05969f3dd2833f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d52f1909d0d89367bd960530b48ec95b4ad1dd6f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9831ae6f91e556ff93b422715c85630283d7c72e0fda7d155db249d8c50fafc +size 2716 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c92b26e64f36d0165aed4b1df643c6cb83d2fe31 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc997dd17b963208d60a4354c2780bbca3284d2e3353f2c0d301cfd8a2d78cb4 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..31f1d756ed48dad28f09f9d0c01a1724c840c694 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aaaf5b2bfbcab27c6067dba5a11820f98f5c872db7493787b2535959d73f544 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..00a5f5167c78ba9bf453f8ca68360c728dcb5a75 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ed3d429f7ddf8cbc9a7da9c146dae00b3f58088ae17cdd7dd4db079e0f36d26 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..58abcaa0eb78978414d5546c3ea00c959a0790c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4786557f4d39149db1fedc8f04d39eae8a440cb2f00672980b0c24c49c06337 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c14f3c257941bf127dfdeccdf85b8b4b50b3ea42 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b8f2c90b469ac54863061ab3a249b6fa10e0bd8a4462d56be1f155a0198f24c +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..300c8c623a72374b9e33ad926682963c982af14a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d04bd90882aebbefa131700eccf7d57f779d7641463a3a3acc629004295ff6f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cf366b4413271e4258c03a8a58d248b8e3f9171 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:077cd4d607c8778560a3417fbe53feb32bf17d1d3456ff81c2ab569d62cf39e0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5810c1a0d945474ba0aef5836ba960c1fb4fb6d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc770e565698bb93e03118c65abddc7d833395840276f8f121d7b3f4d6420fa8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.24.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..117400ce493d61d459f5bce924f7b70e80122ce1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f707319381a09ee565b3648cc508e7469e24eeb6a9ad679822a83050c5616672 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.25.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2cea06271d1e174187aee49ddea5df55cc1b905 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c3475f1d9eb9372a452db0226c43699935288cd95c462e72139bd93b5da4c9b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.25.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.25.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c96ef491531d8532bc2c762423eab1d75f405bfc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cd1f4648fdd87d7e0359c3bc45fc1e557a290f9003db4b1986a503b58b796a0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.25.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.25.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e390326ca92afec1651d32985eb40d37bc5eeeb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f59ed16e3c22371d492ed154824742d878cfdea01f0ef4b8e6f99d8cbdab3e8 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..20263edb7e76ca04a51a1b62ab340c25ce122d94 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90342a411c0d92261a8d58fd323d90de451f7981dbf33a0217b9a2b90cec9c87 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..424f7f9d995dfea7d557b67deea5899bf18b56f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac4d150abb6cb14724543378c5c294ad849dd8926007a84071a8b0b41b7463a1 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0c9555e29a5aab30273e7c03c4d6fca05198030 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79154f9084c48d938d25f77294ac4c5ba741fecdeac53fedad51f2541a3609a9 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..657957a6a6f2872653523cc2042f6d5369856c2b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d14c46d77dea6641213ca97739a15d126b5d4f6fc6c1dea3a651badff1a58778 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..53393cd18fafd4e428b686384397ed7d2bec993b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e80ab4479fc2250f37311ef5f2b4e8c3a5d241690f4e234056dbd4d3668dff70 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b610e9870a52c8cf60af4290115b70ff4d5592d9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51edf3bad57b7116d22caf2b081922af6fd781e1b2b4e3f5ad7f473e7cab07b9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b671db30287c8b886212677cd94edf06c776684e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e32dd7a64ef1de50634dc9f18f98aa48d1b55fc243e3c79c96dd4b12e28835bb +size 1195 diff --git a/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..55400936c789c053ad91e23f1099454212086882 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1fdd23327b46406c3ac0615b39d440dff23669427b75f58ecb57f7aa3e4668 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a731b1419bfead420900a734e033006c2955224 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bafd4fbb3612d4607daafbf2963b681188f07e8422e9d453146a2d56c6daa6a9 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..63dd353701757ab1b19cbd803ba56b503e33d684 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c0848da454803b189034aa634e6d15fc99597c53fae6dd47f7d507940534e21 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef95228485d16547b14f3047ddd85c90a4d1603b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67bf1cdfbfdbdc0dbc5a81b9145533590c4e578e4d38a0566e1c55180df07a87 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5405f65e418931927c4ffc797d6086622a95900c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a76e2659f4006b00a3e1538e37b938328e198464625400b8cb6e864de5a1559d +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..368b97eebdbaa3c99ce0572d78e8ccd89ae7bbf0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8488e005e57aa0f805742bd1a40ed8956b08d15263b2a419c0fd48edff59f659 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..09876c294a1a83e4606a64fee7ca40feb27dbb6e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ff616ca09fa6cba4ef98326e102a8d6123cc59e51ea3c30c38fdbe31afad2c +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d9a302316fb20638e1d96cdd73596056e2a20f7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1dbc3144577c652a81119e2af17e19eb780beffcc9cafbf4441ca111593eaf6 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cf4c88bf990b048dcc89f674e66f3c9a2f7564c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3c73fe4f116a3d78bf48e7f916f4a2ff6161f29d18bbad84225fa74eb9ff7b0 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..163a089f37da010bbda1fd9533e387a61a98caa3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63948e0c889fb5d3c76f4fd3b682de78594b6afcb46c1f21c00e767a90418cb +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fc1e46a684dbc5c4c544757c98fdfd1f87659b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a4748ecd3091e2a434f45c7db62ed735cf0be111b324da7bce1635f801c72a +size 8860 diff --git a/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ad43fc21fbdab783cc9f3b5a443bdae0b2636dd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a4f47920791fef2b986e9410b45288768924cb9f56882b18d8ccb7f1698913 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f33007877fdbee0dffa87a513004133667750fb3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dd2107cffabccc2be96dafedcb15649eb1608880c31d807d55fc81dc4721ee0 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..50d4feb3d218f85a81b6b146830eae6637005c65 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11d6ff9f7d5a8648e361db7ebc14fccbe8d802fc82bb30968cb97869d7f677c6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a024910a45a3c10e2d1d7829f304da8a04399169 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803da344e0ef38681f5ba849c50069cdf26e75324a340f98b172aef90cf520fb +size 1195 diff --git a/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fe7f132ebad1e55b529c522d0da2598457450ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75c3dd346f7c1043446e5ddd6c283074d14988395e5d045bc45ceb3fb93dfde +size 1165 diff --git a/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae23c7f50b21a26fa0b55ffddd950ebbda893b8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e054e32f7dcd55ee4c047e9734fdbe1cf0c734776bae4bb2d987a569592fd09 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6207bcf3055aa1b7fa60cb437cdfa68ef89bf527 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ced1edf92fd1ff6de5af23f3c015569d0ba8d474a8dba976f19fab780152a640 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c42bfe7030e6cff2755d13786e2595b74cce3247 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:921957914d85ff5fa1e5c12e6c3ba429cf46bc92e72585feaf61ac86a1251339 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb41c5f092aaf12c3cb304ef57a5b34a3dc0f222 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:056fab76aa7dd4550ffad0eaf04e5bcefed5f1701586b537888ccc68f5708505 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0146cfaf25a48f577f1ad8cfed960d7d27f695ad --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04527500222dc50f673244aa7ac6dfbec504f446db2fe6fcb6c568e16d1a0318 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d34859d55091b3602c52a61888d049d9ae71327 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790ca1a1709b1891a74bb7bf560a2fd049115981647c7e4fe15c82c3cde7b5fc +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d89ba40c169343b9609c86082d9380bbc522a5e7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:051922685fce2261e118be21b5a73c6a36c8a4d135cdccfa4fc2d1b77d641acd +size 1180 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3556d8f8e37ff4f1a4c2a2716bd2c7241096c367 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f327a9f885534e8b4cf0add0c67a3353d4353f9925678587b9d25f417001b81f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9682588f704e4f4cb91038f45cbcacc1a3edead4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4953dad1b6029882cd080026b8891ab2047e3d0026df7e856ec586a4427d2ba6 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4dcca9ab9ef69069e387009eb6fdf9d2c6500782 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87bfe106190e36199fbc51762e6ec69873444f24ae5057d55a676cf4d6e77423 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d051093a20dd6c5483de5f5c96811895d53ff72 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f625d24036ccde0d6a0cdd31938961751e219cc8617ba2f973df8ac068d60ce6 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f71c5e62d0353967b30c2b4e18ed2f39b22dc0d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd81c96ad80d5ed3868cd6c8df476dac98606fb4261fde25021fbde9d0bd3703 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7641a75b198052dc86b05cd95af2416b4e1632eb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c732a728cd56252d33ca76612b89088db8bf14f974c321a4ff64ce8de9795291 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..de2a64bfecf2277f64ae486d07e838aa022fed20 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e48c8e681b897f96c66fdb1ca421c56a6d2c5d9e922cd14a2edf3a0d25896d55 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9edf57f171904e803a66356ac7058d46938ce66b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735618102e6664ca4b6eb867fd5fd2c951a0f83e92364915522c590a5f02772b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa563593661af9813720390e3b68d8a129f788ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02f39f98325f9073ecf3582e01ba4b1ed035f00b18e3ab1f2194b8cb87883d5f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3902d701857027bcfd8b1aed9d6b8225333cc556 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b538fdc8169e52f8c3de3efcd599ed60eb3903aa592c937d8370790941914cea +size 8875 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8c15f34f8ef87160a6532d28b355808c2288bf0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df8206750b53783bad2e83e55fabd67ec296d8c42030a0730fa4fb0d08d80379 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f51bc654f26cf09bf12388be875f55ae54a2144b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78524ea09eb75b9e5aa5b0701aeb9ee278b245afee800a3b8eaeb708de5b0fa5 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..540effcdbb40d900a97b2016f41a729ab7215873 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03df00b6c69f6b1af2b8f9508cef1cdfa407e0542f321e6f471ce5815ee55906 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4482286703ec12936f05c9768bef7751fdd8c6a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:655f77391f85f4bbc9f2e4facb557e2f74d7ed9cac4f15f82c856abac3c549a0 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..32c933247eeae5a6345ac3f5b2bf0f71acf4b99b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5db93b933dfba2d8549a314ddf44871c2f0e3acf90753699a6ee935e5afb7c6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e6ed0dcc19390818c9eb73eee6105b5b1e84910 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8885601def3b91c97f534c5362da3382c6817dab00101ff1a2572ea88c1740b9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ff334233cb5c6777a160d736cc907a7ff9756d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa36dd9ce547d8fe9e6427ee0a648d939c0fdb2cfbe672d107fa22a4ceb4082 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..96b09239a6565d859b545b1aa5e093b779f85969 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c74ab28f7d12818ddb395d09d31a4d4d20a7f7e6d77895736e0f82e13575daf +size 2716 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9de91d54f44c5168ab22573ac431610057e3dd20 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0d37f9a408f702f98f68303b5b6cd9c0eb73fd6dade50cd0227b8ee9170b740 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a17ae00513eada09a887eb4e9fa9a9f507b02a7d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aecf893dad6324b8aebd32d7943856edfdb104bf25f9339df9dc4556276b04a6 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3fa83a402464e3bb024a9af6fb12285400d2693 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf97c35d784aa4ff102aa868bc226bd7be3d1c36544566d3b0104294dfa53638 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a08c446db523284050f77a5a38d903d63e4d231 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95761484a2f324c8ab493b860d057b135f829eace96325508270bc57f463ee3e +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f73f8397b83b7981e7db6820b5b394f64d7536e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e2940c01abb2b620b435f208e0f788cf7b89ef362fe041cb30427a8f927620d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c37938b503670e55640c76855a16e580aa94dbe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:786c34543536dab1198e15861781e554d031ba069015b7cd9e5deeb0ddff1e4c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..062073ee96ff7221cf6a8eb9a8287cd936076ab6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0cf529e60f62a70c741c278ca8b65147e074b1a8b679f4840c27a0e36b998b3 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d053558baf21985aee58508530d11afe3777bb7d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fd684d837331b5c7b7a2d155cb03104c7ba8cb50073de99061142929fec8413 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.25.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b888201e0018bd43b09b53add31f22b719a44b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:839e385a8032a06bd916c241a2d86dd1ced2409ec6f28bcad3375ddb110c3490 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.26.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..54b6a1da8d091fabfbe654170b2701dd55eb93f9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65a645191a709c339a19f88bebef2bd36f49fd0acd81a39127f7ef1d96330f16 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.26.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.26.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d69cb816a9f37eeee9b5d44f44b218ed70bc11de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d83b6352ca03935408f4adced0d3f8f546d4b9170154d92f33a1652d805f710 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.26.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.26.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7844f674368c38c04642c220459f569a6d3793d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e52d6a41158639b4e9297939e0c38b0e01cfc627590da2ad8beb1d6207a24a57 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..80d0e40493784b1c8fad0f19b5d20ba18443d57a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9891af972a060b1b1d52418f68838d813e6ba4e75b385f311206278834fa7df +size 1195 diff --git a/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e482c16fbd2972c670134f31252ceb6f03570588 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84af6cf0b349b127cc2bc18489385156126ae55f937c9c3c916882485828a316 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..570f1324eb59bc5bd7bcebde975413bf1e546d10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0460f92fbcf5076f77ae3a8ddcfdf30c3e9ea0e9ef8db2ff37e47dc1bd681f4 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7176b74e0315d4ed563acd969e1d73cfe6798901 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35aa2942721a22760dd004dbc6cfba35fa2b2c399e009780d0035d222d383839 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb15877ccb6e4b7ce6c300567e1dd87067d951a5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9095cf4520a5233f51fb23cd5ddc13bdc2fa742dff0a6a077a372e08163e5738 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecd52b486569879e1aff54efb025dba98ff3d12c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907ad913c8c475c0ff239a37c48f03f6b2d1f1dc895483211d3f57b9a195b656 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b21c3ec54d33eba5d974cc0d029ccddf78dc83c0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d0859b0d1098953d6ca42d01ddc1ca8cf83456b1e83f23aa176e586f97131ca +size 1195 diff --git a/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..27a818628ff005885a392e0cbb0f6d1fba1b36a1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b45f71a44f079ac383c8878e7023682f667bf07467136dfdc07db0697a76a055 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9106e732efa9b7215342e320ea41310fa810f583 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3276f4239fa288f9fca13e9686c7f6155c9fcb836227d3f4f68aea699404af +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..167488de84b60799e8c44026dd6ac9c837a0c359 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd245bc9cae4db355b249e5831972c4e9fe8a429f0296690fd784533a07db17 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d321c77f5f79563a89cb770cb84048f8fc0881b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04a83563e280b1174dc003beab0b5a64c834f9f5cd5260b6525eed55d65cea29 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d53a27b1cad23d2dc29c01f3997589297e796c28 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db54c7bcfe36478eb0c25793e6a488f7d7a009696acdcb3e541192c9ba3df066 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1df9fa9f1c01d2d403f72d0b167585dbbb614578 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2781aa8f2630963b829be086320d2cfc775727ddf5bd96a0c8c5b657d5c5a3b +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecfe7e5cc46ff15bc7e46898190bb25d3e6b1de2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10487fbd18939940e80775d57c89714a9f9fccc8e556f984755afd2408574af4 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..92d3315685f40485cd7bdec5ffc58e1f8daf8160 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0f5d362b018c716dd3a32ca15c163644471506cb1fa30ac2640f9b0957c8e6 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd08731a56adb744f654f0402373085415277fc1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38286e7ad30f511a17f67f3a0e5bccf10d4245290e0f4e13cf9884149594b73c +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..50fdfe71a7050b8bb913244fb10c2122036d90f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be8cc2411eec3e9f7313b5e5c4b221551aedc360b944b426a5e3ddbb4e523120 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7161ff8d716dfea636056f7664bd0eef410d558 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ffd99dca51a491d695aff645d3c48b5316c87ca70c3effb3b01c8b82d805e0 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e962392b57b8a49c97cfc2a135fdf0614457f5f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd51606b1f9a9823a0b4458e6d24f523b7048497ecd160ce2a07d432ba990f6 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b7cae06ebdf55601a3d657e743d5cb466bbb010 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a744bfa5d03668e7376117ae640d37fec7cb57ad255492e558aa81761bd07ad2 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fefe4d0ad37f4dded8bc41bfa89339dfabca163b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2548a426263ff988839e71191ba7bd2823eecddcb62a2d45cd2fbf0af8ac3dbd +size 1180 diff --git a/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5aa0589924b2f0525f77aff568fcbc0fb99930a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df33a543b6bdf420a03cc17def9d500d43d5d59cae979881974f74a558bc36d6 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5840989d877d47fc1ef562c7835f2c05cf6b5d88 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04150b85dbfe2c12ab1064c0f94add39becc9fc5a504bbabdb50749cd155a38b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c6efd6bcb12fab07d153b159657794075826b96 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e21b6dd5beb7cb00014bb03e40dd35beee32ca26c9f54ea87fde4f8b4be327c5 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a23e0c7547fc7d5354db07e2777c648bfc276f90 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7101d99421d04f5e6ef6491c156de7010d90c724cdfecbfb78b8fa1b594d83a7 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a13d29d5926b9e1b79850bc5d9ead7cecfd58e0a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb734b5c766ba7868cdccec68cda704de779eb142b0c5ea33040f92f3be1039 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f2eb3a5208fa8a4857e7b5c048b39705dfe0c07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f807dd8777f27be7250478df13a043441a082c82cfc190f58ad019f5f1249d40 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..671431f346a0fb46ac8700f83a90fae168165b3b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1723d75b93ddc42dcc181dd8bf3ca0576067c9be22995ce5720a7632ba9d6f +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..69df233cbe2cd08fc8023ac4b6fd5218deab6cf7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc944cb630956417623a785fc7cdcc693b79d288cc8219c61c7abc7e2e97dcd8 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3d264003465a81ebb4ed33154667ba0f0ff8a05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b07ab4b882498018f9683e3f65e6bbf03a024bc8f3a6515e811c962a37455db +size 1180 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b69770d6e570ef8807c0ed82bf6c586a56153439 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04574ab9544b73f0622a335a88ee17ec6b04269eac3e755b30511503c3c3fd7a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..00e4c1175197b8e27406dbbff4c6bbe5c911c918 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d15f626db7b3803a5298617f49bfe7e9ecc94ff57f75e2b231dd393071dee2bf +size 1165 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc4014779e607bc373d5f857434becd77ffb7574 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb5c5c20930aee8552343fd1b1b58b2f15cf504c21e5d7826b2e6326bd748ea +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f4f35f6ca314b61db1b4537c6db89cb6d0447f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e278f0f419d7d62793b8a14d910e12daa13de0936e4b8044e87e9dde68ee9bca +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0da62d6ed0a7c434a94c25bcb7358b81e3914d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488fe07a09124e80190f1dfcc7cd436e1453088992fc92c2b5b0ca558a670be1 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b77e4079f25fa66dc4a094df5997224fef97086 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe8fce06a18baa72f98ff5bf09745d29e4bceb4b19f50288e76f97e5ee0306e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7bf7d26ed20567ce874fec0f0a356f6c6758abe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:755a06c65b3cac816ec9070d5454bf2d17d2b82fa84e08e2d94f705f6391a3f5 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6096c55c0aa258a0be889a6daf5e6b6940148adc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d84065d06ad55583d5ed7ba3f915db6cbf3424c4f4e6b51e8d16540aeb5dc07 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..be8edf0b0bae343df6b56c7a28c3114c9472ecf6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af0e33f15273c66965423d7a98b0880cf26d16a5f9c30e337c38b1e31619eb1 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f36e4edfd2ee0624eebbee33fc814f56b782f243 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fb66805d3adce66fad7a989e4cf91e28be477ae437c385e189bd390948285bc +size 8875 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab0c1c7f4e1356e2f20429ac07f05d8775391258 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da1a09faa83cffd05b65552c4777fe221c628a9af103c60f94ff1a612bce9480 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b60302875b0da921496d58bf32b9922c839b505 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4859ab1a459ada891ae1b7bca9c44fd00f9f84764eba3b1037e43561853ad48a +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac2f5405f6586dc887464c7409f9ebafe734fbf4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9828d7bebf2ea52982db3f7d1083f54321ffccba56c41b272729c468bcf2752 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cea5c66ecc9ff8fd48b83f6eb5cc37048836f86f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23dda1d39b953d2fd977f9b864ab74454356066d0237004404a368fcb2347048 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6d7cb494682db80e7d00850d87f16a789b5f27b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad565b9a19e10f6e8fea89abcbea4b478564f4050634023804059efd775040c7 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e2fe7b571f277112e567dcbb2df0f97bd1beb8f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78658073adc9bbb466a536d0d4fce84dcc36b52d123a587858af7199b263aa5a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..93fe1dc3dc9c5d533cdca4f4de7487887572cd52 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4c340d9d23d3582c695f1d97debafd1fd37b91d5d870dcd1a2d0c0eb6ffacbe +size 1165 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5ba4863a3142cbfc60e64cdafffc08f923ec83c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6478b8704e475bf5416509297b0df9363a3897c2cf8a9ca13894c729d7a2cd25 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..32992952807b3f5acf22f93becde2ed62a087756 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0617dfba86d8a20acd9e7bb76ae496806a0ac166045520ea53515eaa5875f9f +size 2731 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d21d07555af94f3eb4eda581f093de8df4538b1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41f1b55e42ec616949662df739a7fd765a375ba419c30979250a5cf8d4dcaf0e +size 2637 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9a4a1d2ac98e5c6fcabb481a1d4978005bd64f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1248737c566ea76fc15f3f708d8a6ee9ed2d42ebfcf5b3e983de2375fb9741e +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dddf84d538605813a4efc5c292552902a60dd2c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4083e0695a8a8f57a2e2285d757b7b163ba53cdfea1403b1a4ef3428cbeeb2bc +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dc67d91d43c21de115d5e454818fefd5397286d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9123b4f19e2f9401eccfb554ad1851777f8b2ecb662f6923d8297f3e94dd9bd3 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a36798c04d12db00f43023ab4dc43bbc3cc2dd2b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19314f851f3ef1737ebf4d1e15e10e2c7913a7756f76bc61fcd32038a3a1bf74 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf698a1a43997b031df9742c0ddcec8549e65bd2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b42fb692905f7631c5bb586ea30dbdc732b2d021975c6c553b957f20fc4b33 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..393930e8595a685b259190515829618f2727d168 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2164b2ceeca548513de63726e8f439da2d9993630da3b624d2cb9b342c18c93e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.26.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..58d7d6fa8db847697f506b6f01ef303ffeef58d9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00ba1fe640fea572610e281503afc474e9c6be2500f4917fc5c55a48c26586c0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.27.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fa2618d79c49351c425cd7d2d6cc0ed2bb1c3c7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6eb42d8c0c292e520e9e4ba990f81462709a35f1d4d356f1e3352dfa9865413 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.27.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.27.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..17ab3ddba77d76f9d2dc290df376c104c4569943 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f5da20213b8657f314b7f5083e3ec192d8486b6efc7293ab7899993a01d3cb7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.27.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.27.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1585a61fe33251777880f9051c6f7dbe135105e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37f9aec872bcbc2ff0688fa08d89e39ec60005b42918bd60c887f392514895f0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..33ba1826dd24668f3c770772d14d7836e6780cd2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f44d240dca3bcb5e88ff3668fb331471a126004ed4baa92649b6c0fa2ff380d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..eff32bc4cba26c74b78fffd1a02cf2b356accaba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3056a06a83acc6673464d7eae12e344de0a84043a87b942135adc4b0bf149e7d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c391b7decaf14016a3ffc6d760cafa8921b8e8e6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:950d4dd386f023455472b1f9789988cd4e00ef4351b3fabe00fad0efa4bf6751 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5df4e72c748256f1f2999ac0edf545e31735f36 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a33a2f4313b77ed8f8db3bafc7029a47238fb4e967b76f76a7190809c2c2edc +size 8875 diff --git a/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ffc05ee4bb15dcf3c9034f4c48eb5d60b3b5c8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd520234fd5eb48de77284773ee3ed64e9966f37e9979a5713a49c488333b9f5 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c974faaffc5211b04d6574f05f800d0ad690db2d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40c11d2190a7845cbb3ee412801484c014478c44f4187f2a5f7b701ce26ae9a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cff1408edd675ed01c566dfe5703f8531b3e9165 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f86c3e6680cba39e30106c538b8344b67fb5ff29c1903dc258614cc8b3dabfa +size 1195 diff --git a/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6ea7e348f87d022b0ffe5eaafaa7feb936956c2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9b0c37e553084202bccc26c7180676af870c13b3cce0e9487d226d3a796176f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..78e02fb16b2c53a4e3ee8af2eafea0d4c7aa0ff0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78642d5e63ee897e2fdd1040ae25f1e7d0dc572b7cc5f10a43bcca717364d26a +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..61cf557b6b2dfa63185705ab7dbb1a2a7ff4bb17 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5105d66aad66759857ecb4d18d403cbf35e597634b617b7f5fc8d5c96cdafa23 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..53b004c9f6d1d2372ff81aaea5c5e85e6c28183e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db802a0d3401928d3dfb078e556224f912a5443faa8dad200e80bb0d6c3cccf8 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d976be88db9cc9be22ab59746a8ad8257460e59 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc2f1c92dfd8dc0c15055a602dbdba12e893d3cc6fe967466b65a6d73a323709 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e818e92fdacf5b666554561703a684d27424ef78 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:118c853e4399b556ba4891f1e6689e098ac1cf342426f1644d974eda0b5b7ff7 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5e9da0b375b3b45c79b8c4d539d75120536f5c2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7d4bfe6ed02fe07ce0dcb082adee7c4255f9f68c2811d3707a6dd7cc9b89ec +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f51854662d904ee04d8e13730208a1da1323cf93 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24b7a1ab28fc14462b9a9f268af943d66fbdb6e7122690fba0b10f539ba15c56 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..803ceed39dfbb42ab4429c35d0a8c744a74cb711 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c57072742e8dcc48d2f9068f8c37e8814a0d40fbf4d764d621bccf54f1b0d10 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fd008782ae680fc57e4b540fb82abbd05b1a8d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5f66e8cb4291da7511e30233caa5a49e49745a46c4acbc34006257003b3738a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bb0bda015aed64d561144b2908b7ce192e6f50b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a8c1540c58d316edb9d955d6cf15631e47404defe406c39dd286d24620aadb +size 8860 diff --git a/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..374eda59beaa654a01fde2094ac862ea2ab2deb8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea14b4950e8752223ed89434c77017548c450f087071797250875fa3826bd52 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3de76fb2e0b5399afa83e3ca18fa83b5393a359b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c806935a5bc0fd2ae94b1b1ff7f5fb0d5c1d388323fb14064a3b300da2955756 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..01feac70522fd8f2d2e4c0d903776067ec7c9ca4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5210b6b681059ed5f839a19af672c5a6b4ae4fbd5228e7daf0284eac946249d0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a08b80fcc614735259ce4b4a9e08fa21f0121229 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5b6ca8f52c188859ee4fcbf14687e164e3893988caef08d6b097b6d8e34e651 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dfacc2e23c36782db8867437f543fe28c18bfba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:021a40e62872825704628968aa5b4c5c572605fa112d68c2dbba6859aca04daf +size 1165 diff --git a/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fcfe9a645c362189e38c0102335820f48d2abaa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d49737c0252197e04e3e22f2e00f7ecb425844a9e48708536bbd0c15534eadff +size 2716 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f01d7fe9ae96db3efd0b8165bd6c2600ded18c9b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06ccca0bbebffb9c30d7073cdcc00a9c51e0205796aea90b5a6caf505a636af4 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..77f1c78f45a1ddd467345f5c4cf18db25a07d74e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c3ecfae024e3ea89f122723da1535650fcc6254847a748416cea1f509b2abcc +size 2637 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..952db1649dc9d20720f14e0eb9aac0a2e552634a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea514acdafba4c3476e4fed00fd279b373300a6247259f35740dcc4fcb0fb12a +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc7aba1234fb9dac8ab7b7d23957f7b28452b7ee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73150ad2b1ffcfa1d2c5fdb2249fbf2a856453d3d509cdeb09697af9112f628d +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..67a365d7d73632d91274f05d3d24ca69af61fdab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79051d6cfd93b23c94a1f4561aecd0f0739063d0820c7ff9630814170c63c9c9 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2427081632df39147df80bae3497213bf5e3e05b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5edef4657338294175e875b565ca957d674a76cbac53bb333a2fe809265533bd +size 1180 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ea54d21f8853e0a0076895d95fb883923b42a3a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb1440201f4274b07f3bdb190c796a68cad88dd6696e15f6543bd397c0886ca1 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f294994e1e0750d70b961081aaf98ce41a709518 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3588677aea9d95aef24fdaa8a43c39a53a3aff5c19e29669a790a280543df2f7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e1d4c3a2363263699a4f5b4fa134a1861ed1eef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:133df90990a5321cc8b9413d7fb964d97253279d99b7e0b72327ab8691b3dcd3 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..13c79d6ec0e684253e161bfde8aea982d4acc1c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3949b7038cb646369cba187250b5f45b234658ccee2148e5a15be61e130e86f6 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0587078b4eccc928c56437bf7f21684119e053c2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beef7034731a695955ccdd9f47d5211fdc444ec93cd61072f8df6cfabaab5570 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f192ea2f5c5529cfba39e1a2f37137556dc8935c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0563501167834a809edc41cd4477624305c34f34b84206989252cea5ceb9c704 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0856bfca8dca6a1044438437812cdc0c57839068 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a4ee33d50164277d25f3dbcd38fbdecaa386054ef944ac29ad10e86e1569b1e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..08f234048a06dddc5aa2c084572dbdb79bb74fc8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0cb7cc147be8c4f084da53be0139bfaf3fa6b185413b8fd5baa3ddf3dd34d24 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9227f2efb36e4b4a50c994ce6273306e442ee0cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c28f63c8c8e20c991a149dd5cd4590b5cc19a17fc6e8c82de215ca91f620d54 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c57e9887da1db2a084e336a269da5732d884b2d3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65d866c0f3932507630025e919d8c0f5474e346f1124c1ea07fa8bafa45735b1 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..04df9ae44d0f85d78557a7e46bd05de52d1d6636 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8392501f0457b1a799991a5ab4cce0c0e60b59c0cc8a1fb709b7e58bea25e34f +size 8781 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..28f309643ec06aec6632a27fea4a3492e67ea5d1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ae28eed33892cc05ed5aeafd2f46dd190c63e8deb883cef58a8a3f7ae948eb +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..afd0794a878e359e27e749a8476692f11701d78b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4a71456103bcc6eb0d0354ac999363969364a347ed3e3b78c4c06f75de6e749 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9646af23bb6be6fdc667bfb7ee4dc77666d65251 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c76ceed6f8dabe04eeb1febcc05821627996f9458bedf3ae336bb1150aea4f +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f6d5e6e77d41a8c1f141cb6afcb6ccce4fb3631 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d74761c5c2df8fe2212e6992f510c2183a1973f562233128a5cf86db62bd42 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e97e7744b6b796a7e325a4102aaf628b13c43c17 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:589d9581591d04c793cf73169921b22aef17862f9d5e08d21a2d5a7d240806bd +size 1195 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e239b96b36731441210ce0f4dc6adedb12c4f4a2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6add7122671b013bbdb1b95382e158abc8a3e3ce75790b4d040d7a495930a33 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..533a8ef4b4d3ecd00be1e3dbd6f9e9c6d71ab76e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e77c34039ba5685d59300fb0d3fba5a3fa49ceeadb37e1e05a2dbfe618ea881c +size 2716 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7bbab7b6fdbce6922a5c408b50f01ca9dc3c75b6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54554b251d5e108659fb71f299b4ad77ea364b40929329df077ac98360b0f46d +size 2731 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c68014d5993e174e5f5d5ae3eed6423f90402b2f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a361f5b44138f11758a4a86a62eebd6dc85163a076992d3f9afc1903551081e6 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9739aa03e2612dab6eeb658902718d5a7da7d580 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:066898e89f53d39d28bf63e1053661db92a8a64662834acf8f300d1bd6ef29e5 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e54a98b554f74f38f8507937a5a8b11d93e06e1f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d336adfc82de9955455739953f705072ced6cf24e66f6c31ec01c832f185fb33 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa51a57e98296e6f270b999ab6c0e032c9a0d694 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68fef889607209891552a079bcaceeb3d431c318be73692820fc227d1d043bf +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..14bb40ad60d67ddb7bfb4b685c769147608915d3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40a9a2c8a2c6f9d2e9fbfd007a9f1953cc65f5a53a726d1027d7f593d5a5b8e2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8499744f35e7f0f3110e9ea9fd216d21cf40cd1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:641ace5b3810cf2a562e3172b94edb15493e70ca6b6fac7a8717a1942092ae68 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..598b5d7e9b2ce2619ac3af4470857109f0062982 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a2c3617471cd79e5942471cecceb6fba9badcb7d082bc4640c071f7775cb26f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.27.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0fe40d8c29bda09b253d37e6cbdb2537b643aeb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9bc50df03415ac3ac4af5fba403fffa45e52d031bd49defa888578dcee1e3d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.28.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b22dc15fb6a43f2b9e1ce5aabf0f89de91cd738 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19879ab0409301cecc6749f4fd1f3bf49f79833f51c0cb0ed8d747fb3e023bd +size 1195 diff --git a/global_step243198_universal/zero/model.layers.28.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.28.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb84d8277c54838507dace9594e05b38d8c561c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9894a17e6e85d808fc5d42eeaa7d8bcbefa2f3f989bb341baa4aaca2a00fe29c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.28.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.28.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0d75ca12e7ac696327db1ce449ceaf811c4f65a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29c746f964c1353e2e331875f4a16d2477e0099d16a4c795f845f1d89ddbc51a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..23713aa0a6cc94598d759033e54466f3d2dca1c8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb758c469dbf715464387753389d8368e003c25e763ff4cde4ba0eee8f4435df +size 1195 diff --git a/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e221e82cfe37d911cdefa87d55b9995f887551b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c3927fa09f4b4240904a033ed2a2963204751ae1dd399980b3ee200f5e53b6 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bac7350e5dea215bdb686dc32af2b5ad63890784 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f0829bffcba8219c2f7814025e0fc7cfb23b374c01131b389c6c97165fbf477 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3587a59fa32674e0b4aba9ad2d65b4c4ab769689 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e212421f0ebb08fe84d7fef48362fa826c010cc410f2d4231681dc913ad8b36a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..61550b9a422ce54f9381b383b084e225d03da921 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a44b5901afe7bdd69dd2f798a001b79bb9e536117f310e32cce8ded469ae278 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c48ee3834de6b5e252374651aa76a46629abaceb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d0a1badf52c2f6647eaa57aec362f3068b2b77ed90236b5e57237ba32ae94ce +size 1180 diff --git a/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..17c72c47931959a42df90328bc65de1dfd4c78f0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a22faba2393e127f6101616a9ee77444806f2cf4e5ae3208fad31e840bfe9255 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..32bbf5e7b5136267d3ef6f524dbf0370ef83048c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e0899c000f325d491abd3494489fd5ce1d0605080fda4e7459294e094ad5931 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..54c6b825245c9ae6c2559b318935893c5d51d53a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf283c2e39d7e03ee77c364aa8536d2d77df394ab7e53fca8284d609c4708f8 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dfab604e00e2fc982568f96dcb59d3a8a2c091e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b422333a8e225a0e4ef2fab487503a0a6e086ab7cbdf40d37f2eab6dee2af3 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..56f821a134a8940874b76c28ee446fa8b7f79def --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51cd879bbe5860ca70be65e265eeaba487cc3db64912a78f93f6c27dac39ce8a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d35b3805ec8e2c926e9758dd5d370c178f8b44f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca65b509906bc5fe7f8213db15c213fce105049b13c2f68a1da1bd64c602f9c +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..70cb36a37b27cc90029e6e95df0b0fa11993a1ab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e0c01b7d096a9e6159a9b7f88a14f00c1ae959041f505bc06f83d872ec1eb0 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..76837a3b69a8b77203b2b6c11f1f274713813480 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4799f1bda94c66c96d3e2d8216122708a3129291e53f81d2984fb419e9c13950 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bc43d4c0c1d905c9f3b78f2fde738890e782934 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd69e60d50ff7396887f17f309cc961086ae6ba704cbcb7a8824353b7bd44e53 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..82bf66dc4051287879f69235dcaa4d447ea0eb24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95656debaa48999bcc9557f83b70be08056897431eb31e3a4949ef6e4bd1503c +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b35969c04970d9448cae67518fae1939b8c31ce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:785e60185938d838a135b21dc8254e300e924cef370b97eebecce8d3817bf04e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e79d248a6ce4b3fccd7270e2a82953d7b55da1ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8ba950a4d708c5515320b01011b97b3c0da2ed27b9a35c2f8158026b5c71e3 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1707f23b937b75b5fc917d0ae4c98882db097c22 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d6eb2719dc70dea47b00d899d7f3024de14ae85b54d58888e7aa33b6a4dd2cb +size 8875 diff --git a/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa8ebe7b6a83de820c97ea52473b1412939f69d1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12fa6236754eadf04597789b8d3509af5b00f6ef15b3639263c340c4d4082d22 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bbd93afc3c119bcd27db91aecf073cf52b08d10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37328abed1c95a6aefa2d480d0b58850fbda392834d58ea2b895020430074004 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0faa0a8645614b553a10a1cb2d62488df163794b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb2f7bd4d24f9e8fb724efe62cb58a017ec1d1dc88b20d975d48f66e9f0f34da +size 1195 diff --git a/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b70d7a5c3c20c5f361375bdce424159978e91e3a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c48fdffdbf98936c45707af3b4abb2668cb738ec858355c2a356469e7f22b10 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f7d10be751a3e91c48626bd4edf7b97e3f5b15e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c356ab6675d2681356e5dc2ed366ddd097f2744dba7209c8edf10e5c3bbf9a1 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..46d6aade3460023a7c3ea6cae192624261e23f1b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef4ed03aab022c26afad174548d7422e7cf841973a44329fbd7bdfbdd6de7a73 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3c55aa278d49bd8d82f5ec57e28a40b90f2339e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac09ca6d29765fc023befd10f8c50e51333c094fb69bfe7c6d739911e76216f +size 2637 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..16e84427c0361731edc6be01de732ee852fb3c07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec35c9a106fdb1753389110cf2d1be3874a3e3a91be32ab3d6d8d0c7b169f597 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb8e753ede770a39e05e06ac54ad28290c0c467e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beee6cce4692ddf4098cb454ebb73392401cf8d5d7fa73e1cc44328b6d24193e +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..801f079bcca64958f56b4e4d232553b19d11615c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6eb8f0ec2782fd9ba3dbb298efacea8edf0c98ee13c341e08d0516b2977ace +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fa2461346fbf548150bfe3f45a6bffb1008e687 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ed4b0875856b2018697b1784c235af682d2ae0ed45a98c98d5d3cccc65e304 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e09728cdf70c42d0334b3435cf0b76016e0199dd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:470fa2af4fe7b8d9395d81067027dbd242e6698718d743aaf5a089f36dd10496 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..52e3ce45ff3be421a94f6ddf2e344bd224471603 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:def57f58cdaa24107c88df984afc229cb1eec9694cb1e55f075135e3ef3bb119 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fee09c0bfa0e9a31e2a09dd102a7213d894bdbd1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2d177a52706135ab44cdff30e7abd70e3bc931358e5b067594c10f8a735602 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..02f9daf7e06d2aff26b60940e7a23cd82cf5b12b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:772ac8b17ae33b92d82c67d20e122432203f5700111b8e0963dd0f6a9998b438 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f57cd825b62f4368c73a796303e24f44bb70ca9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8715ec48c7da7b03f5ba02e648318f25ad77554c7ffe48b3f396d92a1c34dab +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..485d9d19e6d68707e17c37b6ebd21ac3fd4d85b1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a52f0302c5080abc18c35f69c87f8d2edd76884888e8b21fc2ac01ee9d4167f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0a82405bb27e7daf77f43080786b8eaa4254b98 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdc36588004d1d7934c461c31be07217abcac9bcde111af9b25e520dfc5d917a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c30d3e3db3b222434335d798f730f5e18d308c9a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d7c095e22a057e4f594fc9db061f1fa710f6604f7a3d8f5589b070837376a2f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6477cdd3c1ad4aad1070f07e2eb9261b52d629ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f57a0d3b3fc7ba6aa8e89e26db66831c6cd471fc4b232632eed8c155d0d8f06b +size 8860 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7130f8016393f480ab5224bba8b2639cf02e03b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677f46d3c8b944aff214317a8d205db5a72854e26a8e6a90bc20738dd9d21925 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0921492c0b7290fbfc93b4999a1907ad2d247f07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdfce41530d4e7f39449654e4ec1b86d6bdd767ff7ef3e0a5c17e6dcf39e6808 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7a90241377a4b9be7f1891514b28cc629e983e0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b763af82950a0a38613290fe6050b9775115f805bea9f032d7994002620a5e2e +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..367e4c110b4d1eb1cf5e555e33955bfcf6bee2ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb654beab3e9639205912c57330daece0b2cb32e74076338ec45c3e547bece1 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c8c6106ad0c5ec9c8fb0a2ab0cb8e37872399e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82201186089e283f0009626edcd2ca04bf7778ca55bf6e0182bb1e8d6aff4abb +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c09c399e22c1a3d7e3c241759f61b2a101c89f7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3087cea70e07ed9459894c3d14d52d6100cdffec53c65ba1cacf44d132b13644 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4aa3d4a0425f52759fceffb95a2a1a7f5a4c7ffe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccd2d9411b8e235f77ae5ea2891059f05c19a2558157316cfb5e9ab5ca8e07bf +size 1195 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..40f0c40e27233f81190133fca849f15d4dfcc13e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eec2a3ced52d5b258f0260e70c05f7e838d8db9046fb87e508df1d9e7b7f1334 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..27f6617ada8c5a0989eb1ba5631bb8dd8c50059a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45f6d6e1c99f3557ad61eb5a8576cfa2e551f4bba614095c96e06a7453542d0b +size 2716 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e40ac6795eb56143007065f4d2577c918890e91 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32c163d9ed3d8a545d28886e4ad4735191c9b4e96566d91ce0e260d1abcc503b +size 2731 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4eaa2cdd0aa37a4244e6cfcf3addeee1e290cb2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e0c799cb6f39f4a2a7933c9539386bd7de3cc6be93f913cd77c8372cdbae9b0 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f00d4a914c90379964f1c86f06d2bac9ca9371f0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78631b17a284d7345002fa98b789b0018fdb9c509986b1d3ba6a4ad025fcb94a +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d937fbd4cff1884815508d2ce129d9b88b5956f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:720fe108aa43aeb3e8dc25d77d84379737096a9df4d9063df62b5928cfecab35 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d66ea73e3e50fa5841fd69483c910732dced028 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:455e3c2d44f611d7c9a6657fe58405f69b02106c5247ea8cf5b3c73d61522939 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7545ac08ae39735dabcd35f8672f38021f21c239 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1914b45849bb79b3a7e7aaa41d6358f30283a0448fe6e648a950cc202a988f25 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..094f8a903d4c76bc0677e3465e72a5949e20d64a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e29bd49edf129e5aa73038c0da6df98effc0ff6e71f5c950570da3da5294732e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..642be1f392b159692069632d3a3ba3d3c846952b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79895d46d921e26dde4a0fe0e970e86b9b569dce10d74a396822f8c9f2f4d3a4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.28.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..40dac29f3ff030dc0305d86f376a82efdafd10c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61817a311a5d2aca257bb5946c7b58a7963c376a56a2ef5b93e81e88792c4398 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.29.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ac14e1c5decc30ec34b80bfdb4f58ada1cc6c24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5800fee961b1829b4540a5e30521f8114037950193e94658a645ba74915c7734 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.29.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.29.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0ce0639d3babba63511288175005e2d6e6efe13 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6d24d11a174657f4f3b8443c271954c9fa821e8cace45f6c97248810c4a8308 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.29.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.29.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..82232104399f4b87cd9f5717552f6d323d89ce18 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aaf6ea0717217e2fae01ecbba3500fbfaf93a2733643ef7a3635157acd4ff69 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..49a38e70d51a983eebd18a5054482e699269cfd4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ffac47190ad8238f3c2f2e22666792ae4d439edb3d1e456e556b6d40c49417c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ed65491e30aeab3ba64e975d53898e6396aedd0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a5f99a22878eb1f6b9ce548189766a8c437b7b34dcdef247feb12ea22f88e81 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..efc8d75ef6826f96a4de5c7ccd8c5bf857774540 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457011550275a4b31e271cddc113fe37b093ef1cf2b164241795b8171b64eea8 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b532e3493fca4e691091ff52963a61a40e601df4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:365e39ba60269a0dc7a0f215b6e4c70f0bd54a2b88b435a558facde22883194e +size 8875 diff --git a/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c25c91fdc4626a0dce701c61383d75594061ff99 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200af307319008320041f4e268dec69ca6202d25ad4c263ba00c17e690c9071b +size 8781 diff --git a/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..117324ee3a31330705fb752c787f002d91331a09 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47be3154094f781458ee4a7c0bcb620aad5155daab52cb786bfd92ef71be624e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa7371537aeb4befba7d55b985ad27320bf36f7c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b55491eb5074cdc2edd0900d5e207707e5eeefe8ceb14aa34494a665521f1f0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..000bc6cd79edc0dbc28c467350a79f255ec6643f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70951cf2b915fd082774040218edd3fe7d71f252cae503dee44593e2369cb22a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..08187d91aa89d7b3e0c2947251aaf8293c95495b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c731d569deeff0ecae3797c704d0fd76b433998d092f739f9b5aed8a1a3a035b +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0047105ac1abe7db0694a248e94a5c31815404dd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:127e36aa82f9db137126a351adffff83907fbd38813c5ecc719cb0d585dbd080 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..31988e8f0b4efb10c24a542d5a15728bb035b013 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0f119ec51e71db905d27631ce1ed38ee70f1b6a0e16859afee0b9c5e325f852 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..32fac843c7e81ec69668ef059c645e6a06dba7de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ba996f81555c6c73917f08d8cd5116dedb4792a01c4556956014539142fa19 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe45d98eda1916a50a8118146e462ee44ef72f97 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1caa8867abaf3da9aafe39b40155f9130668e243fbf9f0d123d9336080b98fc2 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2223d979ef60bf152132926537381f9396a8d92f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b422f199b818855e99f10bb404577722689b10df73a4fbdb20604cf3ce9ea050 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..39087051489951256ea217f9f64af4d5af1ba8a1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83083cf6415b3551bdd010e0da4b392a45bb86c4108e736533be63ce3cc7e07e +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ebfd28a425248316c9b1963b3f5ab13d3c7edde --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6acc2a7c92f42b64a02269cd77c84d219e0d27ff085d542f3d7ca0a761dbc45 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ae685c45b9377237ef20178b16b0e5431ac9521 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d87a5e19bfeaf9cbb1c5262b99c0968bbc41480f5858ff0ecf528bad3684ef53 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd159e0f617b3246b56cad8d14f6265ef4184248 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf4f976fe96a684f66ba0eb3d735eb41c37f369a1158111b4a787be186d7325c +size 8860 diff --git a/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..68cf0431ecb6e810fbfa9afd965db2334364bd4a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e62e44c724a4866189a1161a725fbd3a578f34f683c1482bda813367180aa3a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..080fa4b340dc80958929d6db4467faba53a8d660 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00dc9ea9a9cc9c8bf81c2c7febedd9bc1aef607303d21655477cfedd7375eda0 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebd509c3ab21b0d3a47cf457ae73194234addb54 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61677761d8131260350bf08204b99aaff1d969522f0202e9ac1d02bd6df8e275 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4533ee3e517496f07c0e0b8778362f2801a5ad0c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc729cb0ba905a1c5cfa997b44859baface84cf5bbaf2ad7090d2f459fe396f2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0931352bf5e1fcd6f8642e943577cbcdeab8d162 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71f8ee4da6f340ff6a5aacfde936481a661493d6cc8b4d15a8613159afe2dc80 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..34747872e642987cd8bc3f171460bc51aa2e150c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a85574296939d7a0502c225030d17fdee66ddf72aebfb80a59d01680becb4a7f +size 2716 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..27ba86a0f191fa2742d4266abefaf4f6dcf67a62 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f80eb70c5b680b1b5b380c9b4b1f66b42112963d27325af157a6767e32f1e145 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..49dce1e4418c1138df6731bd8056bf326eb6c638 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e7f66780dd67c9be0c54b555d700f60e513e9ec27389f904add39d1a2be45a +size 2637 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..94462e37c905df76c176c03b1ad23fb20aebcb2e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffac0df8b4eb2d69f265cd22fe734dfa733f9ed372a24afd57ed342a1f12db9f +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8f498efd552f2f46908fd4e3e6aeeddb653dead --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecc5ee4a2907dfd13bafd620a5e7351a41be696c6fffc8c1bf5ca8878cf4107c +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e88c109ad2b0c8dde5a7e2b53a809cc3f28558e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c264ae27bf9ea4965a4bc0280059717217ea88eb5062af393d31b5131b7a54 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..33fc43c31656ebc2c456e62655eb33dd55d1e7de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0771af74bcf0772adb0bda5e822bbf90e4edd1361b5d72b1d60040ef60a3514f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..11129f259cdb9a221b6013787654d20423a5adea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16c8412bb1418d9fd8077baf165531797c170810659a668cbeae2b1bfa13e18a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..01e2429259ab56c14835cd2212a38a3863cc360f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:203b4976227afaae9e3def2d1e803891a83d395b8670e89aa0ad5fabd5a7b2d6 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2038a15d9ececab7b5844c0df2535f7197b39370 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46a6f22038ffef4fa11f5b3c3db06fb25875999451c9ad794906ff4bd40952a2 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee4f464a7a0d628226a06e5028a8a9387bf432f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60eeda3db59464600a5d9348727a409c2f7f725b0c1ee26120d473c12dabc400 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0501bdd9e53ddfb7f88776e0b7cb3efbb514e7be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db89e0b932bfc8b6e046bff86bbcf51c06b7b7d23a95b7d3abb88f3025f2b956 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b1ac4438cb29fc4c5b48b7e8324e295988bbd05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df6cd49692c603faeb393e07cc405c0b8d19cbc0e6b5d4d016e96582c207adaf +size 1180 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8acc2be9f30a41bd54c9d1e8c2af1d1f8655b8a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f710642819006901044b23fa8df2df5b418450d3bc0d1044f5ff69b2ac73671d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa5d33c103bceaa4ee6f98a0234286629996ee20 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bff82452251462398dea0f50895e7013d49c752ed33577a1f6556125fc7b4f6 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..66beee4a5518195714e7c66cd1a9014ea9f892b1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae0a02ba49c3aac9ab6fc0a6324705af198ce900d6adf52808460ef630d42598 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c730a9d930dc019e89d096284356b71a5e18f74 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daefaaa8bf7b09e666883fcd6da34049c18df0b5eeb8f18f2ee136604ee4794a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b8d77b071ad709b6b89d982a2293f2ff8e4e9ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a3404394cf5f5d41a4cd7f37bc15754cb9fec136210e589fe943b7b6f88e64 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..da76b9bb9e176c435c16a8b6d855e9dd7ffcb71d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94e4c4e8291e77bbda19bcbef49d2b69709d7d31fd3f464b663c5ac9c9a02d8 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d97ca079d35d4466469261d4ac2c32f971f65ec9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4536cd98b3757329e6aa3ff73f3d3502437d3522e6b8cf5392d674d9e4a9303f +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c04d54cd49bf206342f1d3aed30e7de1cd74cba2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5e869ff77786f85f20a92646c89f59e2ba1ad7851113de7d74d286d73ac382e +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a5b54206d6f1e2b4d532c3926c3c1b246f74842 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a36b67157eec9b618cd06daf6f9bd3a47375089d79419f7617c6e8304b73de0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bf615fa0ddcd5e11b546765fa9ef70c96295e1d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:750994e577b337cdeba5027b2088e3b3393276e12839c8e9603457cf7dac3c50 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c64f84d0136081e93c0f309948a31a72dfdd1cc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d74f0526b8b6f1db46ced6643d17863eda7c8edeeb4e915815e17c50c594e33 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..46affb214b307f122308ec4ff43c4235b70ee165 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:430c8f9fa980ade4c4f65183b3677c622b276ac5cc321bea0979d56e1ff463b1 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad1539f8f4d8e3ecc234bd4e43ca08d6cd2cc8cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200a7f039d0eff87d0466bceda103825c1e8a678bb46a64ea035876566ba74a5 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..26a4bf809dab2e62ea7e2943d30b4c794d50102f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ca70b00f98d7a4fb43daabe675f388bdabc0d6daf0bbb652b772d39b88ed41d +size 2637 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffc8a7c94aeb2bf89f99e6301755543c6544f65f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5155b1aac28fb3b10f6d092ead1f59b702b23112a910eccf833f68bec03dd0c0 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bca1e6ea812f7b5ce211fa3bdc5c9b0068761c30 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6c6dfedc38f1d2edf551e28d5f116bf37a2ebe83e0a791373e89aac33f1c9ab +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbd48379d1e745c8626b8c5e6ff0a1f70ce0042b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318d1a27a58537421b825f516521644a05371c0555731a62f69b39101769c9ec +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc42ac280104d09732eab0855d9f261cdcba6463 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d58c71f4c712105312b9daa1d6535eef92ebc6807558bbd0f31849c857b42bd +size 1180 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..90c024554525ac4ded1e6271392f6e11b9bb1466 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a57e33ff497f44eb657a66ec4ee3eb43d43905cb0f3f6f56a1cd6dc7f369854 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e2610587e990d217da4891e0e94b3b8abcc64cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2864c33c5eb220027e29d4e3a29faa2653251424cd853f08766cefcc8eb3fb50 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.29.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6ff0ca8fb908c084f6072000f94d8de695ded18 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:854dcfdcc395a1fd517f382bd795dd083acdc8957136715367996eb9f456d8ad +size 1180 diff --git a/global_step243198_universal/zero/model.layers.3.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7e665522decda554849184f44c4925dabbb25f7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44591f0fef1c7f2dd436f99815cb83210f19be1cab65c76ac425b11079c35c29 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.3.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.3.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cabbf49362a9c6522eb2e690b0627e633054493a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbab55e5b5c19fe38e7987c2b4ab5b85c61a00340e5c40228e94634132e7310c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.3.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.3.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8271210b1b935cd4394ed3f2b514f4d796cc4924 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a35a56973ed7cf85835fcf2885f37286a7d4b3fa7d045f79772bb09663219b1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b7f6f940444a6268cfa94dd5f9c2e9358bf0c20 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c4900ccccaa33cc6576c4ed01aeadb642f86f5f6d513b46a07acc86987c8547 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c13157f936c2321100f57003d81da46909311c8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf9dbcf08f2e50b7d8a07dba1a5bd4612e5918dd67681bfbddc67f248ab921a2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f62fe14a59aef8e53c91058b8b84f1582cc1728e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5726335bb63b9719ebee9f7d195224d7b8fa08039982a816e70c8eb64b49d64 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1a011f93b5bd10c5751eb6caea1ac38ba1c5aa2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0e1952d81943df7be5389372003a82f36cf82b7941c800c2d7100c51f47e60b +size 8875 diff --git a/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e6b23671f590219009991f10f727c4b7b10617f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d70f23791afd6d227ed63ee2d1f0611226f92c1fc72827cf8917080082b4fac0 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6430bd4b394b69e8da0daed768eea55425dff9f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a2573ba5bbfb8a39071398bd1bd633a35b7e32870bc8ecd97db8b57f747442 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe9b5c9bdd2cee6c1c0e83c28d3414d0c9c34dd1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5499637e626bbeff408abdd106c7479edb2003604476463d8a7b66a5dd40249 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4e6252d6cf4ff658a844a8a7426dacc65f70712 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f2de783ef11a17d09623198f4c6fd19eb7c70f5e419207163c5cab11e38e2b2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea35ab89599565c83312cdfa3c8d84e634959c0c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b0530a68e28522ebb0e97f3dcc6603adf3e7da1ec1056fcfc6a9bd79f23858d +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..816a58a7c4c15c100ab0fae7726a84e103c783d3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e9360cdb59bb5910431fc08c3c21c9279a0ad638b41584b8e47293972313629 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5008ef4ff53c19715d396f54b8e1f3c30f119545 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:538ec7823ada708c91c6e41d42691086355629e6ad1d5efd582a47def4ce731d +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bc5a0e7c94f23318dd06f99c2f2fad0223a51dc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccecd7730d5e5ab0457932a13a0151062bb54e0fa9d7aa8bc71a1734edf8b9e2 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf9030b1d51a4d49fb47f4aa366ebcf1c1c2022e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:708ea06381d36931707e6254790c936a20d03022faeed8e3355e0f405498b3f5 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c456f9fcf1bf357b99cd9754f962a5f6df6929a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e68e6d2289f435be0b8b5a0f6c338595edab6de8c90401c0c6955de44c9e1661 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8a5182cc54ef18ac40a66b84244b9814eed58f3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b849248247169439e76cab54c63730b2003879a55d227e06d8b94937a2d0d365 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..23bf300093aaec012707e1ef3d6c95ecccad41cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c729c2534f7ffb69a20ce98b226f802b9aac57bf61ba780f014b83675556066 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f198207c44c7901a9a1240f4c81ec7053683eb4b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd1f683cf96e20405ece67cfb0354cc651a059ea7dfe73830b0b8f8ec66df434 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..14cac08e7b10157039357cac7072f85a88dabb41 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcbffd77c33d72f60a058558d83f9865a59db7d0cedde1b5c7a49e81cb6a8d8f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3411ccfe2d4110c4063d8805e9c4bdeea224504 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:388383f58b2773a0365c2cc43dc77565db79019a68907dc7c47afe91e8bb8fc6 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0980ee6f9ddf32ce112060e6de01cf53c72a51b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4013b7ce64d121218cb8815d76944dda515e7e48f935e1ae6e626ee67fcc10f +size 8781 diff --git a/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b8277cc3a2c358e2ce96e9798d492ea176d0410 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4470b2ed7d203b3b3cf0212440458a36ed22460a8fa4cb4d209d81a0f67cbea +size 1180 diff --git a/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..939b6508503d7aece127c8e61680e3757900ab13 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcb6ddb2a83896f70212e82510b1fe802de585c6ce68b5b049a04ec1232e00d0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..59897cc2138e9f8afb6c20afd16c03d78c7e3273 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f29558d77500167984f2c9c2159be6ef817bfdda534ebe153f5cac74715abd27 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..517dfce0172f955e5629fd37be2d8a03347e38d9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0476b298fca8fdf1ca54f0a42258aca93c02c0ef1e5569fba69d81f362639ebf +size 2716 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..27c998e49055024257872722a601ae35b03033d7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57473e1327c2c3d255cefc121660309288cab341ffce22e47f4ec1fb59ad7895 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..64638e93b7b54114c081a566225a685b767c42c1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a921417db42132876112057d607456bef1e79075d3f4263c6800d74dba6e45ee +size 2637 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd993d95d58d6436e14da1a96a0d7624b41e5f52 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0e9fd4f15f056ffebb7c7f61a4f78ac5d954ba2a252de010550088b50b92c77 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a0c1a150a23609d31e0ece88035c753b3d8da04 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6493b3d371b390590f27f45eb8b2a7b133e572c125b1726f5b7056d413a34e45 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0821f6d5c1276082cc80ce6ab885a615f047ba6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c15962806bf2176f767c9eceec4623cdb47cd75f4e02e019fec93bbcd2951d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3897cb0a5b13ac34e47a1c43158fd63c3ff8f82 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22f39e8c0f04d74c86e21a4ea937c5381e62663d9422d7fc187049a68775f49 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..751a709db6d624e6b76fbb4ff5400a9567ce04b9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df344eabf84e144e5021d62d036649b9cd5ac66546bd692605c7bfb57bfe20a9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf5f36619723961bf9adb289d070e5dd8759d7d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7eedea7804f100cfaf8a489bff9e61c7e32c4c410bc8bc7facd59d72f31c4c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2702b633d72c19136a3e65a86c69056f1b9a245d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272eba66fc90ffb7fa57aac54cdbcc626934d57b6254f0afec82bb8c7b5f0a3e +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..984155585e4d008a7933b8b5b076143266a94d4b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b02e3e1425cad415a26609edea1d6091b0f51617efe9d18c493a1abd0efa9a2f +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ba6bda7f33622128dc7cb22ef5d7e93b9dc0f9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b06bc7710cb26cc7934b7d72d58795b71702b1c90b1d6a43d9e7b4611ee172d7 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2edfaae6ff313df08f5426f7924d20ed42d4cac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7818c8253e5e009a5e8c6b483e7b3f61f58a51d0385a54dfc3d3941528307882 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae82a63757cc363d7d0a17ed90da93657b1424e6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d004b305a07d9edf6722f5d0cfbbb55af5a63fe4eafc0bc378b87f8fb68fb5b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1af58f347baff0480c73414112b3b66815574218 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86fba95562e115902485a05ae61edcf0e69eaf0ef91da53000c210ba3cb4a2ed +size 1165 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcbbd29b8c07776eebea756bfed0c7abdcc87a89 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e08b992fd6e904a86d7dd92b925a93ae8926ea2b429507c9fb3e6508060a9bce +size 8860 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ec5abf2a172e7af50d8f885b04bf6fe679040bd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3382e7ab34847667e914c98fa32d750ff7ff22b13c3d0b074af6868a0d6c498a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fdeca897c3291ffaa3ea5dcd9f97d8556f093ced --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b45ce87bfc0d34cfee4de5f02f58231e8aea345a0b6f1363f485612cba763c5 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a680ad6a4f452792db05b3252fb043dc4145dd5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:598ed1c9fb745d003bff68d11db119d69da6b77b7bff7893353603cbf8b17639 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..defdce6c8e2255164aaa9175dac69fe46b9aadac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b67ff6846c49e8544c357f099d6f13ce4e589806f39b5af448cce037563d9ce7 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..32ea1cf91944eaf6ea3a8e47d95735f10da2b548 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81c812d0931f7be6bab110748a7b3a85d70fa3fd43f43ed3eabda54e211f72f3 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..260f90b22b601ee42c453435e9ad05c09ae32259 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e67fddff439802847a712870356b9785bccc12a59fa4ef5972ea08db64a8b3 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f220f4fd0b4d59bc5f13d639df7ca2de04b5841b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:091a24b1ddf0e79b2e203f5ec1e7ebc789fd420a8386b9b865d96bf654b8f1fc +size 1195 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..51dffebfc877de166aa20dac68d83a6b0f5e4329 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9df1cfe25014a057a9f83399b9f46ad95fdfc250a6bb1c43b5fd4188bd6f6709 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dcc7b5e52b8e95496af829118fba11f52c51e1b0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a970abb39829a4f081dadfe402861231cb3edf89fbf6d7c2c946d4769dc99215 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c22045bfd4872852c9203e2eb63ed797f6fec7db --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e39c6f1cc20edefedd56ea209905b50e0fe8d30d437593fbb49d6f3096a7043e +size 2731 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b318d95c67b4586c5685c0a28eb0d7b1d3d183fe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cccacf1206d720c15868acfeeb7029760ce5411833a0ee1d7fe32d28a93ce3b +size 2637 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a4aea1965854ccf477d55ea08a14c59a28163ce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673c9e32652451c528ee5f8e048a31b481a7f1dd4323d787c8ff8d6a4ea503e9 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ca118cba72397744d1ff1830aa291142bd4448b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:274ab219c872f92c3d3a04fa062d41cc5a4be7affd3dc047eca822f52bf52e9a +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1c2c35d0c776d20e7667f2e8c6168e849590a09 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e3c55ee5da0542033599d5f1fb32f730dc7e01c3f4433a74280e04466e04ce0 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae2b7c2a80958f9db6c57acca319a1b0f51961f7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2bcfbde088d03c60fcdd2dacc5164b830926f6fa34da0b6d793cdbf79c78a19 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebf7935a9f1b62c31b23741adc32b8974208d937 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e3486b3c6a9a747a9174f40910eb46bf925c747bd07d80cffefad5651c994a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cf26bf81fc434c47f63d36b7c312e7f7f9657e7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d46be668c6bd578c38652f32f35042c5dd36aed96081b51519c2ce211a9ca3c4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.3.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c06ae0b40f33f9d0f9aff332da7cbb8ffcea402 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8cab4987274686a19f0c0e9a25effd464e8327a183418d9d7a77b16529f6d96 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.30.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5885fa9655aebd37b806d28b8400f57e354ddf96 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca51cf73ba6e03e528f3eae8b3f691ffccd757245bd641a49eaf380517204beb +size 1195 diff --git a/global_step243198_universal/zero/model.layers.30.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.30.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..258bbab09fe836eb67add0a27d020e58248d644c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9228cfdd00aaf42dfd5c68c6c514b266ac202bd38c9544f3e0490c6664b64ed +size 1165 diff --git a/global_step243198_universal/zero/model.layers.30.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.30.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4c2355d87bcbe479d0dd46b9cca30d7c574becd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88cfb6f67f04f0a1236d81550a59711226654247e35c11d99c0634a57545f6bc +size 1180 diff --git a/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed3d1b334e6e8a32c5dba97b6644149223f5f3c8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:038b0dd6cd77d7106e9b0cd836d72f7402273a43e3e89104c5f3fa8b47e63583 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b84a594fc21c28c40df7fd2c4fbecaf47ffaf99d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373a39e5ac490bb8eb25fdb7ac8a8b02f835b92b27fc687f7e32c4b70eb08b7e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f33188edce5c582764022285e59d73e116c4f70 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5d1e675890a50104578171acd0a81cb9a5010cf2037c4e43d4fb7a8772a598a +size 8860 diff --git a/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d035ec8c01203a070ba98dd77d5546b5380c0cdd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d0ad2eae14bc9eb91346b255c9498e0c38119c04d5ffeacc4687382ce65134 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..35acabb03f807c1a6ee9a553c198232ad27341a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:629d2f605038aa6fa0c514d79629275b35a19b344b0aa0a16993bd4524a11454 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0a8bba77932e9e27c4536bb2157653caa2b7cb1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ee357b724d0a5c19d8bf6ecb2db74fe4886e4248c693f7c77de25004a7543a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..43f02b5a11ee638e71434ea262a12228086ae2c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c3081c429751ba55ad96aaa1d6913c4c1e7fa0193e0eb408947f6f106c01f0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8c8bef6c94ae8f5fcbff274ad00aacd9baf643e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b4826bd6bb3ba752189b1fbd1bc8066f199ab14ddb988de0d5f6e29142d0f34 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..14aa187038b360a45e505d15b3b19f953bb211cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6fddcfc0afac9f850d5c4007c1dfeb3eca0fe10bba4e2b6abcadafe1ccdc946 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d123fc332851e2173b1edcec93fd2101988760e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30b9d048a0bba26668f1793cfbbce22136098deddb84defcb8e6bb788bc3ca07 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..18ea776a3d01b226762d4547ef832f9c2d28a7b0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7df17b5186acc911508fce7a059bb436db46ce0eb57451fa66ca5ff21aadff43 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b304402abde9cc2130351a1db0d7c215d56d55ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76a5b6f92446132d3954d11c528d6f849a5dca7c12c28ceba71c84deee3676f +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7735ec99aa27e6a2f54a7f1d2fdf7e58088b88ef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eee3094a9a9b7e4b192f2cd63f6281477bde6f4634b536fed8f93702891ccccc +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d798f7efeeec004515fb8dcef54d0b7273dbad71 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8f01ba19d3edd6c42a282ec365e4acfd7bacc0acce4eeb9433a031e06c96917 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b55a7b0fc0a028c2208f5b51211b6a9abe5be7f8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15213c8575ac0564ecc8ee46e2bddecc7aa0e81d00194e12c675af25dba15b0 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c433c011275597e90381fcfaae88f47173ae144c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1eb8e0f20337ca5e24ea78c0ddf91e97e7464a175ddbb4f11f7dbf64fda842 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..94029cb71ec4f6b9a8e156f1bb371f060991d1bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0781caedbff97874daec239f6d1f488ceed4a12e17781ba763585c8452d23e1a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d9ff31a6d14c0f826e7f21af6ff7a602ed1f4b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69e4f6b5494351638bd8070148ddef773413dffb43ef492401c8617a6c9bfe48 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fb8f9b6fe3184c5b3dcce2fd047f2fb0af88020 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1c327c8d3274e5459a52508928698efd4a1022e87d16ba33f2405dc73649d0 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..66d33ac9167dd0c2904b9bac6856dc0801bf84a9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3894bb4015ca115772b3e0cd81d9b69e2354bce797c520f165df31a4b9defd0e +size 8781 diff --git a/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..edeafb5c52fbe2557a1330091e5e99155847ef06 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4f86904b7d60baacf3da7595888485702be2690b819038a82f8f5815c362a83 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..92129f81e22bb24a7ed330424e3330e0d1ed1f44 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:253b548d94da6d8cbf4bdda648f988282a8204f2bbd4b766e3a763c9988737a6 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..00b6ca317d159789711a8c13238a4938fed24c96 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77d0dc802393077dba0d24e241d363139ccc5d9ddc9042a8c58bb4cbd036cb17 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..84c46af8e5d36f17e37ad2aa18b86dbf1368d6e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e26ce22d54630d662f2fa1e8223c8c2cf6d8eb70e46639a5d6c5277a82946a6 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffd92fd2375dacee2ea2de29a217095893471540 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3962051c1a0bdf42c56c148123e92e3899628451fa575277e05e25e9f6672894 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8c9682cf6e1342638633c88b9b51573c903732f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882ee099ad94ee4487b864b14e20a97ed752b80735b4d54ca0b85755dc152e93 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f10e0345e127e3b7c13097e529cce8380623eb0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cd945b5ccbb846841c7700df9efef490ef732b433295f5528c6cb4aa5c2c49a +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..106ddbe0dbadaa19a2ae41785b2a5da7ae1ae170 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0acef1ddf9a39523408633aea247e314ed859edef0f029c589c3bbed27f1b388 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a710c6761dc697c69ae5159873a65f7c98acf400 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f39c79dfcfe1740b3679c641a9071e90c86be525d4ad2644a32c69635f6a852 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..31bbd9940be1cef979e52e2b8ec53942609a92a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ab3968caa7e07790f167c6eafc93eb5973fce0281562823aa690e2cd723e65 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..90d17b377314993eb5da92561fe260403e72b4f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26189a3f50a9dd4e12f7a61558e8ff60ee56e940b7c7b251f3cf328c884e5e44 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..439f1937c67b2cce45f716caef4b737f0b860400 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:888df6ab9883ef8f7908195655fc80da214be2995c4203776932c12c568ee440 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a077883163aa787c9cf70dcd3c54293413388ffb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdac3a71a1175fe4a6f898622777f35164786a83938ab379521753394b876ffd +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..32f623c20ebc015e8cc3fbf50915e10613d9adeb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb03a83861a78ea9f5fe7627de843933b6ad6c132c3b9b44897d7b45271d3a38 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..03f5455a7e8ba265ca4fb0571c492433d7131d25 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cc3ccc0f9c7aa4cff46edf105e2622b4b3d4c0496f814361ae53d9734e32fe +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..84c9b9a24f5ce944946afda921146856acc5c2a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bcd752244621511ae9cda9aaf6da58b673d5e7fc93ac9da9d94f83baec598c8 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..38ee30e779a4e504aee863a0bfcc8e3325397f4d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3629830f85e8886c9aebba9a619f0474c3e6136444b35e7e5e476b5c73e2dd97 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4b3748b4ae14f2a7f796c0767f4fa74fcbaea4c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96522e7cbd0683bf425fa3b6ae4ca6cbcb23a8d39bb28abc671bc590ef437a6d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..874c91e7b515329f172f00e5fdbd6608fdd8bc62 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7a3aa3e8550954e22f532692679fa0c41556e384192f775d2caac7b7ac8de39 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec0edf2a48d21723a2d1dc56fc75df4b196c0a68 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8726838a14fbc804bf78f05ed24fd8b77212734408ba87dfe4270c86327fa601 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..016cf7e25b9105339c7c917f5ab463510778cfbb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3891236390b1a621ffb353c995884e6655d6f78d91e75e1d04e0d1fd6a72d068 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e93b9259d8b847d5ce657b035ddd4c4ebfc1292 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f352e8a39b41e9f451ef52fa0f55e0328abfc0cb8bb98af2fdfd8bfab1ef9f77 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc1753cf67c7db5aa7b0419e2e67eafd975c8b59 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66a647374690023247362c6eca60942989b26317b1cda6a874eca15266ca04ae +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2409f7cb88a4eb78f981d3514a6fe6400f1a87bb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23bedae40ebde7b5bde563a7cfe63d25309cda7c68da568269172ef5c82fdbf +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8687ae3e2b59edc295d73639e0740fe852478abd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f471dfaa21dc30933ff08787406cdcffd456d6a9e3be77073bc26a67101eb81d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c2e4c816fe0ef6273d36943f6f0d6f920ffcd40 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9723d8ac7f3ff35a8c171353467cde925824772a75be31452438ab977e669c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3a05c45cc39ae8a2802f9067a974dfe129b98cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14f049645ab1db6dd9f025dd45abbc11bd43d043c3db66dd29d3631bbeb65933 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cd074a8b179aa39c9f6b608e657508c5ef1f3b1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e46f5cd04ebbaa25cc5b11c7473a0deb02accdf00d2790e406024b522a19df +size 2716 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c767137bd8e2f01f21921064961440c0427757b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a9a645efa125780b5478a727367923d0b6b45c3a84d931a2177dec532875925 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e3cb5d5229b506eda7ae74a6f48fe3ad8484943 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a5b7d30b065363089faabb90da528db5a0a8a996f3a90d9beaaa8b6a7feb09 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd2dc0e807a0ba776295d496bf84fc54b18b3f5f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fc87c470329bab7138cfd11e9f23d6bdbe45d2e632ee7914005493ecd07918d +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9aa0815007d937b2988bc24cd34123da7affca24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab97e6fbe3cfc94bc19bba987f3228244e58e1f3c22db00e908b1810c532816 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8025d40e7d25f71a174b0482bffb425c9a7d791 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752753f0595d63b0bc8b5f175087cce64f68e6f07400b07c93bb8255dda1b8fb +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..15dda8255be97d9974bb01c5eaa5bf58fc0517e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:907be0c98bd821fc192fd6071b8e66d218b197f01becfbd909e44660e2ed9ccd +size 1180 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..040466d5b2ee8569d116744849765c637540c08c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bd7eedca5ea95c5045cd955b27c3879bd818ded7f9287ff49c99954bf8508b2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..21037a8c90d6a2923a0a64935cce6bf0f44f7536 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a453414a13de0459b26f5fb9d67bdd8e2d257e948a7ad323f9b99cea51a479 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.30.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfaa7deb348c39ec92cf8250f2e138c676d57124 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fecb4427b858562533e778929845729b79723ca55186a6cb626f56e02632e252 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.31.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6f1cf0ab8a0cf26884dea1e104824a38f593608 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8ab966a80006b532b550c9a2bd6bf59a8d9d2c28f47290424c4829dad3fcbe +size 1195 diff --git a/global_step243198_universal/zero/model.layers.31.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.31.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..690f747bf40e06f897586cae5ccacdb86cc247b0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9915dbac54cbdab4c8fca0ca4ce27088bd8937cfa0819a1ef33774db85363ff5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.31.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.31.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..786a63be01a82c87915934601f86822f52cadb05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:888d345a55503227e8b67573d2e49d35ce5872de89498d019b96fadd91bd7bf9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3f94a07abb09091fee667ce84cb1468413101c9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:588d29bc8a081acd4b4e3d5480566843eef7a43e12fc5c143a93659ed1470894 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f27279a3d86a006de7fb2f8ed2600d9eee1e3a1c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ffb60c66977a1ba3a32ce5512175cf65fd23440825a89228cfbe611e8b9db5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d15d86bd1bbb27268310bbca2e6a5a2a7c9731e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b80c72c6cc1c52188f19d8d6a036fc4859602fc676dbe3e3585ef6a60eb28443 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb261ff5718fb8f8d2927889fdbbac84b48a1a8b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dc311cd9f61612f620f1aadb494275e0ed53b1e6af45ce3f8331d3ac8b92d02 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dcccaa93e40e5b7351da1bf796dfd42a5e8c61f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fceafca7d954f3faf282875564930260241cf6f7cd944678b0c8ea0e6c19a7a +size 8781 diff --git a/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f47846b9e98954abd81de9dc3d26461d2d459500 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25405576ad61531fa82d5c517821667025e2bcf8f41e333a8fb49d4ce95790d4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce57802a0ae48ba719a540fe44687e8a22337cd3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a5643483da2b13968c302d5c136376e6a400b2919f82e44daa88694d8a50539 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d321b866e1c1abaad5172c6ee795d5ff399cbdbc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f9b4a0efd66631dd3759f222e4854596d8322f983ad433693fb31373d9b074f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f36f335c0344246ab925fd34f7c5f32d2ecc97b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1f36602c416f617c1b55269c98205f8f179326e79d2348a203f4876bc704070 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b34fd8185088662c4b37a511f895552ce90ae9c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3046d4df03d56edb69b6b925d0f71c54a6343d4fe6cc3f67773b299aeec6c24d +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..15f9b3f279889aeefe5adc13726b3e62b43162e1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4009fbfb198a3aa18e6da73ad5c30fd85b29a2ada8194f77567b7771cca6e77 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a51acb8ea5416b8710fd45ccd0d2531cd60cb22 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea087481b7d292f5cd97aef10bc2f4a8d6b3613e6a58750c25d9313943b76fca +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..77a15beadde0211aa3de175d7c3b7c9e912dd4c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc5b71b074e5d88925d836ecc96dea809e9332241bdebcae406fedb780f3dd4 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0130a8fb1281cdb9b3d2b6d5585e337e3632e107 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ce1f5493cbeabf7c1add6189749adbd7687681e1c3199edbd2e783b849381c +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1ce26986c490e6b09351a45ae190b2bde2c88a5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c557b0c83f4e03fac8cd1832a4151955f91fd506a74a7a147057064fddc4844 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..08819e1168ba39a90dd5ca4b936e5cfb546be68f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0131187e8e187491b048bbef59ff1423081a752e5b60bf79c78f2ec4f8eae748 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6889cf1e07e43cfedb9d0e975cda894f0ae60a78 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db95919dc23835f156f1d373624f7a38cd1a5d3bd205d2bbff51d997ff76970c +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c5d13b7d1c5964c26ba3c07b89601b7d61fd643 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:164c1444b671d31984ebc7e6058c43748b0d896692a543c8ff70886ef58801e7 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..65a2da150f6bd74060b00f9fda27e8a13b735ec0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d050522d7c70da1e2f4962a5a1b3fdd63f23a37d021b4446dcb81d56821c9b96 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..12ef3e8b5e4b6e5c708b399afb2523ebe52e38fa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:304bf25199cbfd952b77316569f02b3c5b61f758c19b1bf406509db86cc4e573 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f7cb6d42ffb6528351898997dbde9848763d1cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a2c8a4b57bad39cf4b82e08460625be1db0b03586ab39078fa6072654809f6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a07970fc8db90b1a58fb343f8fe18086876f78df --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99ad5bfd9043815a76bdeccce27c2ee3b60395ab8c2a48a79c5ad0941c38bc1c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b12205c740b5a91d954165c08afe43ee3313d294 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2dde9aaa9f5fcb6ae8512ba6eea6f6c4a3b1b6020f7ad70ad7881e5267dc92b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..24e7cabc3db47cfeca02392fb75bfc6f04619388 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f314e078dadaffa72714d74d1310b8a03842671f1d1122a1b7b09d692e97e8e2 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddae1ef38d8a42cb1a9e9e1be1b0c91c3638ae93 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ba5540fc69767cae370e0018d2b0932fe226349515abe1df82b3643f9bbdced +size 2731 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ace918b5eee7be76f839e23665ea23bbfed973cc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c7df07b69844b8acbb9b7538531bea40ff3171ef0f549ef79b52b9a9d16edd2 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e9d5355263c135a2ddaa4ba82890c9d39ebda8a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83efde7786e6d215dc2b47d36b8f2802814423ca3701452cf3df88d2ec74b03b +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..02df3509a569581cda48508ed207be11f84c3589 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e11fef6ba55601f545dcd41a49a9e5d870a80f44438e710f44eb8a828ac71fbd +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..958d9f86051a9d0c67947878cace46f2a057ee42 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ac7453403e475eb4088ffa942c2ebde59d5f9be65e24862d76ee44e54bbe0eb +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..310cd93a2b7f4455a6b04524d5085efabf903623 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b9ae4c6497e26ac366211fb16d3bed15d727361fd3d07de5b0c4076932ea7e6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff39a64a9c7051c37a46ded5bbb921018195f8fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c2cad4655727a94d197cb155982f5a97826e0f14f68056fdb7eca551563825e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf60c94005d3aceb1c677ebbeed3881ee0cbcad0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bb4dc4daf0fe086516a2120bba60884b3a6472b478a50430c9d0a394b4d7409 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..98a2bb33ed43e64ab885823cdceac5c3a1c74b4c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82ae489c49e577f5f94dd6efb17f988b9b78032750912e6c7ed8777895b2772d +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..41ea831e71abe24bb91fc3e4f07dad32c90add5c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00a5e0897117939c3d42e0ab3de9ca6e1399e465f6b48c83534af327a410cf69 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ad983c2ad0518671b6c0261347adb5fd8cd8151 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79a888e87c07683227fc4206e35b91383ec4a9eda6b772c6fc549dff9aa077c5 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8104a5b6df864e016f32499f7e25d3663a801c21 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37e6ab613e9127cdf63ee45e892fe31f8c830f11e1285e294a88227606be631 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6dbaf50642da80fce9ff26ace1e239b23e5a5bba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f88b633b8f9a7d8e05959452247b2eba0903aaea14e826a3414ffb4ef940621 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6559f9c07c673fe3adb9c138b569215d9753730e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd41ba5494a6fd70608a50524c11ec8fe5a73ced7bc32b9c3291be29be83349c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5495ec3ab55fc354daec1a10e4f23c19a4783f61 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeab96b80c95362b3911e8c2bb8c42768f70b6476e7fb351fe5e62c2bdd6085e +size 8860 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..92e3704bdc7b061c189d1935cb01c1252412c246 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcec0106908650149ed83fdccc95748e88cbc68c9450d487215b5c3bbf68db53 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..652fc84b49f2037bc8992598aab4996737bddd14 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de7ff80887516f4d70a25d9fae7f64775a79eba5e7b7c5c49527f88da38d0b28 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c01db039f9923e940878e3589f222f42ed41a6f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59dac069f7e9ba13350b15d72be9e1dae7bef5ff44407d3c9fe14e28f394de72 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..95554bf80c2b9241c9e0c7af67ba69d71dde5ba9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aac9c33fb073e64024b6af4cef5d4c177f66616452794bbbca3ea514ee82197e +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..25b3d9056bdbc56af80ea91c5d6c2d9e139771fc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a783b45127db1713d1930de83f54b2645f4c6a0bdcb241c074d812497f05cf +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5eb1fdfa1120eb04d3ad8a55f6f4209a0f03fd4d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:173d1b527fd0dfc5e747f4cd1d60b947dad1aa220d405ff5aa0b056b550f807c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f76dd280897c86334fc309cc79ee474e758b672a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:619b73cf1b62cc1aa236c8331f9e37f19632c22b1e69d08ce450b5757d8e5275 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9de36e2e402373eed69d36d2a1ac7ad8207ee6c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce8a0a48d26085188564553621e63fadb429ea7cd75a9d42f28a653b0161693e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..82abe0bb5828d81241658307478dc715fd0e0b0f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f1fa82e8fcb77a2bc9146615d1b9086f5878b104192e2aaa140f5adc19a1423 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5424db322dd56ec6a50354ad22341f5e26dc2dfb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:032cdecd0b8239c92202373b33247140b5996f167b0e91e9111d23f05cd92313 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..addf45782c09e5a79165dcfd95b383059d5528ad --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:039f664ac4fdeff70422df4f9056f91217fa9669c493b9b36baa7ba6df037393 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d721dbe13f54c0c5617e7e7b0be168cde40e6a8b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:753c82a584bcb0d25d5ca80d331fff65f3b7b1a10405b65076916d28222c9274 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d54794f088fa7baf4da540be8917cbb4df1a062f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05f040ef453170c3e1df6ef6e098d24db04d5d00eaf0a8acb9799c279961eecc +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..629bf0ab816d17fc106a1c20c1263b7494044fc3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc80f0e5a247bd1a705e937fddbdfb532fa0706615c8fccc1447dcb77cdd414c +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd0b0d72f6935cd404d6c74eb0d44c2cb9ce5d5e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60036a183676a1984e704c5d1deda4f0eb86eba2df24aac4c1f4ea30d6624aa0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0eacd260c302011900d375a7d058a094399ac3b5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f4f36208b6740694bbf1344578ec22c30dcede773c95837a1e5d6ab1f0c7c50 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4d5f26be1a637389baed2f5bf43647e403308ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a3c381ab444f70df346d6e1f654a2a44905dd6773dd52726c981e9776ed61ef +size 1165 diff --git a/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.31.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc8b047da00c96f8a49aa854bfe3ea2693822bcd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:770bcbce396ab366065d84121d6d281a9c789c6563e1c4d42f676e49d2ee3f2d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.32.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8654828a2c4ed85cba823f3a43f86b55cd7d4a6a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de2979cdc60156597df8612117a48abe44a1a92c2b1ae474c5f166ad6c4ad6a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.32.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.32.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3e3a20f4b66544cca90c4e1f8a6c8de7e789f10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1de5939d7b837cee7c3c03b17c8a05e8d16825de6a51ed87cd5d7d6660ec8429 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.32.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.32.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..334d9a59486c4f051765d81976410a75fbf19cff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5ab9c58524145da5aaeb0f5c38dc4a01579883aee7dc963889d93be46dfd394 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3280856692d9c0f428ed31c06901a8209c47ba0e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c5bed04c281f9f6a64bca22cfe048b9b846e68ab8a5f1d9bd4816fe4e0ec672 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..53b05e5353015d4d08866d47392339cfd9bc9b58 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb97e020b7d5eef3253c566e829da568a22a344ac0175b9c19252ab07b6a47d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..84652661df29eeba4c313a1bf1507557e6554fb1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892e81e28cade40bb70664e204a63b0d2d6b0fcc045a6c21f86c4d7a8bc04a80 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..56d59af6bed59e3489927df1e01c5e315718fcc4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3393b1d935d73d1a645504ef23c980736f11569e342bca72609c293ed2a43a17 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d903fd1c7d62221af4db5af09a1edb8fcc031a1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10700867a4e875bfcb95580e3d0f4c1c87b4b0e7d73f31198698bfa058401b24 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..826bf966bb22074a5500cbfc704c74e6950e9b2c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8de558a5b1427be139c07b9b7a51752e9fb968e268792875776f4cbb70068b83 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a22c4695b7026f85df6e70064873dfc4a2ccd34 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd404072ca6bf4cf59e1088f9b2d9c6096fb045ec966e744085fc8c18a30b9da +size 1195 diff --git a/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3171bb1baf1d5672c4542a81824bfbe472ae15bb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a4aa941eb02fffb7913b9ae5e1806eabd64cc4b6fdd49ac8c2a7309a87681a8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5970765a4641471065ff5d85b3b8f9799962ac54 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73d8df02e08305aa5684f5dde02c23fbc7d22ad36498b7fd2d223bdbd93dbdd +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..84f3d97de263c6b936cc9d37f2a8522ec4528fdb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85d371a42c96cff0b2acbf90e9949bb4bf5fc85a2c4d74fe91770427694d5852 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..560ed96483161a68953ea1093a4b0b7652a1592d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:607a57e8d56642e380dfbeb2a9ccfc94b3adedcd7b7974760179a5e5fca4d6e5 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b4bc36ca1006418a5c2ad0440341aa2b322a25e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9390674719be3cf793657163a3333b7bdfce098839e0171a99e9c1452361f576 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f627606d35261e108eb92a08f86815fac01fc053 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f64868eb569d09f5f0cac9bf07b976c8446469885a60c442e5f8ad772b3175f +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f49b9fab86325692d88b4c134030c907b8f10a7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e55a46caf0259ed05098b326e9aa53f4841e6268f444296b035a731e91377309 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..06be991ad34f903e71b69a24086044a56a4d011a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77ec11a8181d836bf66a55a622620e6f2e182439c0d3fb5a674f3c4267aaf546 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3c0e3bf27df649d98b5727332c6a23e0fbbb1d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c47e7b6e3b208d3d35be686c243ad59087afa0847c1d1851877c7d00b8e2a2b +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cedc64b0613a2bad2007844f58b1c8532e35fb56 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f7023b431d806123822e2a30933fa5b71b4e5230fa87c756b90dcaa3d00a66a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9531fecebc23976af3ba51680c11d44246ab3a5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba3e0968ccdf75896aa56da4f501ccb45fd668620117519a37438c2df979300e +size 8860 diff --git a/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3cca4aead67d33bc61a6b882dc9eb2bbb4e8f92e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d2a6264a8b194c24c558a7ca82ca261de368b4c40abf2c78083736ae77cd464 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..99d4b9d641774b2be0f549865616670590a3654b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:284badce7de5729c5ba1eac15ae3fb5d23ae74443e7d1e23d2a59fbf79cf028e +size 8781 diff --git a/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0929bb1839fc214c412259f79ecf436015599581 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deec17f20f161e790ee5c365833ad89b65d3c3ce87bc382af77663ff8a9aa642 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..658f2382622d86cf14b798795fee2f74b24340c8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1196d6ce16236fa6d74d0ae6fa9966a69c36d9eec194e89dc19b821f4cc98c03 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0b8f2683770aa1aa0ea6e086a45a9a336db6761 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29db8a67fa382c93a7f26ff279e0aee20b4d5037a62888ca81d73bc8ae8f0ea2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c338bbc9ba1e48e857e03b203f2552bded537da0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad4ab486cb7fb1b9878fc5a7bdcca4186775f785d152651cc42e995c133c3588 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0151bec50a3cf8419d14858603a0f1ab52d0ff2f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e80c4e89e9c9298d730070a65a142f6c65dd622f8098e883c6cd2b1a588de2 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b70c08d71210ebc57f9d76ba362815415274e7bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4b73b8e4203a0de5bb393e65ae754d848aac99d46bab928c45b9dbcaf9e6d63 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..67dab885cb081e2a7dad86ddc5c31b9da33ed16b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dde9d99c1f82e820182c85365757c009774deb658965d1062b733ee819180a5 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ae6fc4ad165d4baf2165cabdaee589c89363991 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d388e7209999300b8eb1d9040a298b2b87f49b57e9ed29f1ad21960b8391895 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dba8c3bb93dba4ffedf60df41872c27b805072a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3459f1ae2848fd22f8b3fd32d053b3473304a64c843a9ae9dff1db65830322a4 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ef847a3f7db1437162a3d297072807f51dcd9bb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d50a5ed479a560a620929f680f055d8d082cf90b142f8d0fcb914b7a8779591a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0e34e2b966d1e780051d600f01505a8b4c83386 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a7ffec16a5487b9a72acda9934e5ca3aa2ef2267cac5f3af35a3cd4549029a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d15bd62a5a9596217e07bc86012b1a366ad4c3ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:949493615957dfa3dfadfd285bdd1426193ff2f67b10f47666244ec4b3309474 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f718f8045cffacd0bcd2ca1ace5229e05ca6798a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b12b1d4262b6cfe021f8bdc464025debc232fcdb4863e2b440dd2a20819bda +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5eb47b9a6aa16e28964c8670ed2c91b3f6235f54 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e6ad9c390f7563d614a5074e23eb195fd478d48e274267aef416d0cb1279b67 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6d43353afb596c84e3e06c53f40d3c5e34a4208 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9870673182bd4a9021fff27fba9dbb3a0a2e173199fb55e2831de9743937c9ef +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1344249134f63aebe07dd1acc44b29597b58d43 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d2b9240b57f1564acf19aeb63d48f8ab87abdce5c65c381785c452a7215661 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bf325fd792d51361dfe607b32b76d2e4841c89d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:635c3278ccf60ef92602297fed5e8162f67c555dacab6b4977e6037bb5b715df +size 1195 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d43b2720ac10a1531e7d15ad4aadc2132b02a347 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55c041f0bf1b3ee90eb02ca240f57e4cdccd96e7b20c6458a057b6557656df88 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c973d6c99180175b8c9bc7837c2f888d1106811 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7165497ba3abf2f149b766d449a81a5f19d85242256c781659e6d845c4ff153 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..42f22afabb3cafdafe24e2e138b571c58b4f92a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a0041aa05436fbb5c7cacce4aabe5df0d5abc4fbd59b4f1b5b3d3dc45521f5 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fd1ef9392b9b324a25eec3a877e8456b6d3d706 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c78e29102afdd8ade04141b6f06435ec44c75ae993ae5520ceda6300f8a73235 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..535acacdf7ed3976e5bab6c27c906a533e26d434 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b7546c2cedf95f2b66839eeb3de24b7ebe8c7fa723d6a82b363ed6fc61d87c6 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f278511e653f30c7a9de80d9028b859cc59cc48 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9ab55f06e1fd2f132be8466c9409b57dee7dca76f07514d0346d9e971e1884e +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..df95f0d8c919b00f08aaac2a524eb58b4ee036fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a505555e086d743cc4993d9dae0df82d12516d4afa2b1c07452c090605358054 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7f2b7600dbb54c300ca8e870e4a962a1a00adb5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:778df7a92303c573ce6b31d55f3f75741330741601c41362732858158f7a5afb +size 1180 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e36b7e0217fcf60278c3fbd1a9ac12715baf12f9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:039e4ed056d6041dd22eee1388ddcd09d171603c75b0e8b8bff685004c36e78d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..60f426b03f5efbb622884bc6aeb047732755067f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b700109e324e079d44598316a3ffc5ec0883a965e4ee2a425f3ae7e6b9d61671 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3ca1b38d44e9aeb8b4ac3f2139697f6895445bd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4aba213266e36c65b3fe894110066b0f4591defedc358858f5aebafe5372ef6 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6914ad4d367ac5d4db3f0cad156a912b695e37a3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d25bafb1b256292bcbcf8a5ecc7d54f8c421091a93ae24c2e54d0f829990b6 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ae7cf54fbfe654a197ff05a550ab69ab4a14101 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f0cb249a10ccda87755f47fc885ad46d31f25de1cd30a88af3a191e5b90094 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..827f14d1ff169f5f668ea36bdd40dfbbb89b6e80 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a673009767104d60182dbb6e68ed82b6001132ca50ad02d74e2fb355b4767b +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1ba2da6c44a796effd4cc75d2fb77ca24bc173b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74a5381b481cf0f25c55532dce5d803f47093ef49c01d2f79f24195f80020548 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8e86b6879dbedd677fa04905fa58e84f3d142d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf254a418340644526b8e742cb4ee420ce63bddce2ca76ce477c32409a83774 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cb9e0d3452d592f741beacc78005e908f740c8b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0a9d3d463b208e0ea67f27e9e3d9e7665233ac315a7c76861a8a6f3ff3c2cc +size 1180 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..26815b564843e0095435e8836266868046bb2215 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dda561daa261fb977464cf541b7fb53a675c3dea41010aaddebc4758cc97ec9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..821bed163f0a706ac11cba6fedbed6f3cbf2fbb9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae793ab5749497b724779dca16f69e73b4729df59c2f7f85bb85642775afe723 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.32.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ae0cc97ff4265dac292e716e84307561b847de8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec7262eabd0b7956c23daeda213cac53f966642da8b5ea0705b93a8ec457f2af +size 1180 diff --git a/global_step243198_universal/zero/model.layers.33.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d97dc440b5c3f73b6f3a8d048b3414901494f71 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ca5be1c5b255f79bd6cd6c111eddb8cbbe4790e7e3bda489baf44f925872be6 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.33.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.33.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a3170ae29532195c6cd89f5012a7f2a990b608b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23356945019fb4b86910ecce7674743e12face5aac897dd8c5c477e145052840 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.33.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.33.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..740772e5ba9b5396bebf71ca88efd5edd14d3828 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31032ac016f5abb5976f2203e6983fe1594dd778430906ec0a88eec4ca7e953b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5109d7027d68b29f230b867d1c2e3e80940dd2c0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8fb48cf30fb6cebc12028f88f83168a87400a542ecfa3ffdd193930757f1c97 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3270f96767260691b446a6b57aaa9f8f42a9b322 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5db30548620609148214cd3d688dae6b26b876fa54c172ea830859584b0fbf05 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fafaa666b48a2442789818766663215bf302ee2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96827920e0e0bccb800c28e93e1b30cef694bff85498b9b81cb82b451c13feb3 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..32623582f40f34586df68f74e5997491ae41d697 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb0bf7491b2280645a3263b238c489b519cfccf764840a79d138ef2740218d4f +size 8875 diff --git a/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..913dfcfc2d812fdba7d4040e08539d6d604e3d94 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa13ca2aa93f5adc01912cb3567a118bd411ca6e682b43cd285cc6c58f8b83a6 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..94de1844887b32848ce0483579ca2a5e91238890 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0ce45aa315cbafc071dbbfd5b8e615ffd15813081e688f29cde56d3e3e3aac +size 1180 diff --git a/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe93977b74a7ea7c4173edb5772ddc6f0d64ef1b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db7f0a0a738639ad3072731aa34d7c81bfd45b1b0e1a68d8f2fec64ab55435f7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e4374ea63b3ae758c8046c264edd4e07782596b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4018f693085619357409aa2ecec6ab312edda9a1e3b839ba94f5fc4076649f2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..39563756b37cc71ac42fa2afe7d44cdbaa736c66 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f25970aaa6be7e8b5ccb5d46a0a0b6f40295436ae92dffcb7ccd26276fcf17f +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4ddd2c445dbeccb8df9105d26391b621aef2208 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490dcbd3216f6a93af31567815d16648dbf9626df2fce46c8831da11505510da +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe4ecb9bdcda6ae911d462d44a15a88ae3061a45 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7331748a47c9de5c3cc7769831552500c6037bf173c4b5a31ebdcd9b4e84a6c9 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2e74355fe330e9f067df1c15900c4a4830630f9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83edce620be4522490fe1f524caef3421ba1ac56e23f57868028b9670dcdac03 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3aa8af6ac0ee0b0a02637a92e455c08767d46383 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d52ce97c14b25de0a5f6092df01668e74280838b0e4ff1fdcc1737bac5713fd9 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9303cadae723a034cf94217b5fc5c073e7337569 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa61f90a764d97304339fc82b2e9ab342e31e2da83f804bb499229e12ef6485e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2a61df9aa0fe2c8ba412f739e18559bdce8fe3a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62225a14873d62ae63a9adeac5ac019ae80cb4632b32a07734510a7ecbaa4756 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5f8571ab7767b77cfdbb79eb57504cd4a36173a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:248c8d6b47a9e1ec7be4e00397aa4a40510137569e1dc30f521246f21eba27dd +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..22d92b9a1f09f5b0ce12ec89697ff2b3e9431432 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973ed82b0c32c5115f2cca8da646cf1962b1ea0fa282b72f6565127061c5f70e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..db2d982bbfefaeda655a458debfabc7e7e89c728 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b692b83711dff84958dcbab5dea29254f1a6344fe33c1aff4e02ed9a9d6accdc +size 8860 diff --git a/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7cddd5579ed33694e5cb6b0518ff79c259c25d3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8012ba0a5b41f1a7af25b26667a6d6c8f30aee9990f1ff7cb397a031a32b8cfc +size 8875 diff --git a/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3927d185762d73db0b78ae8a57a39479b2c49c0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1349604fc7e663968bf58bc8807e28d8b1d0265a0f3902803e67a6f2621841d +size 8781 diff --git a/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6f409c05cc5974ecfa918140fdd6084efff0341 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62c9bd25af2954a471b78bf0f4349ba1d66d5a7f890d3fdd7d6ea2091e3780d4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dfb34dde82c80d121c2e6a035b0c50411913ec60 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9f380e0b079c57ba8f209a16c52c8cecc5d902ebfc618a6b9c499732bbf804c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ec3224a20e4caf811b7034e4d71da6719614c49 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77e935ae16dbcfba21091751449d7e147fdaa1a2e94ba6e40741146be86321f0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd4c780c933a97ca99b4b16eae8c51c7dbca457b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d011853ff723d1b0b710da1796b279210e07f2d3db84e5553b54f49462035019 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f2669949a7bcf42e51aa508ff47e079fb811186 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181ff9d1471bdc09b78d55565e959b647bd7f42497fcaf03bcfdab1a8585953a +size 2731 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4156c57513e01b6ceece5048f5f92d2405597a4a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe2d7147418f33c0d98136338028d43b653dfac03854e858471977b8f453f7e2 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..26e7dd492cf755fe73e6f5d968ac75133cd614c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d8fe9e2e6fd351e2c20d7563cd0efe3ff07dbae28a9b712d414ff65a21b33c0 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..82149e60d7ee3e0defc5c26ac86c4969507bfde0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92457715b14019f23d9d40d9104e0940fb084d02a901b7a05ce8456c23f83884 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..66d957ed1db16b6c2eadacf3aaf2f9ea9d463aa6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751f2d3206c704da4ab2c097d3ff86c19830f9c2fa553bcc1f5a44a9f612602a +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c681b0afc046df6fffb3efc84bef79e9f8da458 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7cac61d26b4a5de3592ec155a88d4b01f5f3b923670ad9a6b30d931acf02132 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e2f882a53a7ce191e72293ce86df114b738816a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7546a451903c11e814ebb87645ed63bc8d1fe68d30a91fb9a3af38b7bd07873e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..33572ee26d2eb7ea93d0ff4265077e6b1852c935 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fe64d27b19589a5e3851a63cc72f70b7afea0674f567f7d058f70af59ce1eb1 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8706986cccaf489b12835ac46d104e7aae42249 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f747049ec88c86b686fa6cc9d2332a18eb9a4419f1ff1e57a6096fe19a66233d +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd6f3b939e43f920e1e82c34e24f68dbecad6269 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bec8720babb3125525e02536bd4d1629c712f33e6694fbf199125c6dddd83708 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..48a419948aef5e8c40aed8391c5208595e8be4d6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d73778f6edea4398cc37041c77156d0420e9d4f44b37d09b18de55e8f00e265 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e364eb74ce1c28b892b0c6d82d16a489db2070e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2f7f98954edb9a911e13f63fd5c5569f60c18d0194825e7a2a83b6f6d5cb5b9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..75b74965c89f15a52fd36ddddeec0df52e5d2d58 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67ae0ff9421612999abac86fc906e580e5b68e6bf96d64450da8dcb53cfd12f9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..794702ec3524a84791d94761dae62bcc8c9b2cf6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:239578af4a19d2c50c1f4e3014b52714d29a294f1aa4818fd200fc861032edc3 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4721551cbb7c5115fab8faffcc6d21228ec8e7de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e78208774b5a7f9a94b7915a693adbfd6d27462f4fe5a363e6249c01af000d4b +size 8860 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f288ee98af8fd6212ceed9cc59d3a6fe49fd952 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15c4ee6a1bc9839ebe89c2192aaf005dfe054b3862755e5317808e36b3aee4d2 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7f2a5a9f51de2ac7de1d88939b1c625774f0e2a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0ca930cd8ccf50c39c18ae1b62d22c7902a5f4de42aa84e52d8bb7e2bd086a +size 8781 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0efb609bfaa420d5ee3fa6acba399a5f8a3714a7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a25d0048c949c85b2df4108dc1f1d65fefae689ffe7a59acf9f0280a2c0273b +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..037706dd1da91285cecfb16f0534db493aa97b35 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef62be872ecfed87ffa90f1e5acf6174daf327624f43c70f4a22880be2c08ff7 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa45f484219e5ce4f233ef09920744a4115f07b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e35d2a467d9d2956c9b9f0d608555387d465d14f3845497bf8b306557925a34c +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f1300f663609d01cc3f41891f283f0e00dd761e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08131f57a609cb4063d8d3dc85cdd66c4dd62b1bc3e09c1c98289182f8501e8a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5de5e114f61aebc98d6d88532f463e2a80f6eb3e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a78941a4b6b299d48a7eb9b1b01172f70170b30378436064e18c59886468ea8b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..75276d78c2942ebd3c6298b47744bcd96b88da70 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:763f17da7a861670a2e755d76fb449a01c4f156433a6beffb7694d5cc92a69ab +size 1165 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b2dce855c43f053ffa84a61e25893e576b18d8f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c098b265de9aa626c99b4f985f4be795ebadb613f4ce301a00b53b494f87fcb1 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..14466f5cef188dfd83a7222e2cd75d9966bf6ec0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1d9c677896147bd99a782e956b37fe2732dae6e004a98c6acdaae2aa3d91e6 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ac9ecb00e3d0b50d06b27401d3f401db441f658 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba39638060ae064436bd15acd294cd0b5872fb5a580a361cd15f390511c51a56 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8ae54839223ddbc56869fc2903bb69578fbea39 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0a6a932f5803710d1f1511b4b60c3054a0e5b392080245b4911a4968a57589 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e55a762f363609577aefc314dba32bc69d8480d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19da64ea6e6f14bce7f878ed1600e3260cbd282367fe1b4641f154b6c7aaf617 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d562adbc1710e5d0df813f9f7762264a58cc5f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff4dcc79a2bd88125b7af19d6fbea30cde84e6f4196e41237f7efcff82c002b +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..08aae14415838f9e0eb9b2c2fea9c71e4abe1da7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb96a7c194136e69f0cac77093cf1e6fb630cfcfd670b6f7513ec4708076db1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..95d8c71be63f278685b46b2670907c5bf02b542c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:121ea654834a776fa70d0809a74bd92b5541bd35d0e8dde632fa37a4bb2e31f2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b01cff136308575b429a0c7312d1e8457fe6846 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d8936c17dffa56898addb9290eecb985f7dc58c421282a0f72c38073801bafc +size 1165 diff --git a/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.33.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ead7514f20c20f3187980a2843e7a2f34ba3789b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ade00f7eb5f0569b996846f6c7df07c2f2482e94d9d978c11e9ef9334e4d82 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.34.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5ebed7562fbb6a9abc6698a05de3d52a73eb7ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0224678b3350f4cd3896cc0939062f7b313551dfd46325cb11eaeadf6028dcbd +size 1195 diff --git a/global_step243198_universal/zero/model.layers.34.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.34.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..db43380509b4fe0ac9d6c189ca225543e29a90f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2159729b617ac620a88c2a45839851a24e956d8e92f3e8cd55378120d44616fb +size 1165 diff --git a/global_step243198_universal/zero/model.layers.34.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.34.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..23b455d327caf6ad055188b5d3bbe4450fa24b74 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa81fe4c71b16f1d8c140d83edd3eca4ac2d4cdab2f62cc318ee2deb17a7f815 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4d1444f6d9424b21aef7a4bbb51236b1b576d81 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32d790cdf88ed53b978e8e100ab0bd8a9a1e6a38f29a83ce6b20eaf383f6e0f6 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a1452bfc3213a530953332513e1473ead32b93fe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb6f5ee427358890a5d8cd15d4ce65bf0df91936b497fa3502c49ac3eb420b35 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1c6ac68dd61af5f9776081ad96f0b7400c12f55 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1452fc77ce59316b2d4d89b5994c3a41162b5e1578e31c38fd7fe597a86d718 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1437b548b75eedd1642d610fbb08464391d2702f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:113f0cf2f213084c86300c02b90b3ab8680a0b1ffba830c17a08d357e799e305 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1eab646363787feeb5b490dd026d40ec303b6f08 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e7f8437f775cc3edbc840d28cd08963a195501a41df1b8f069425f557eedc02 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e64c7bac961bd340fbe7465d0aeb2f27d9ddd98 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:897cde26dda5bfbb8fe069f22dadbfb16860875704922030072036ef5ef7da3f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..36bdf2b765d0842b53483b99095dc5d54ffd7b72 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c13004b56cb79f9e0f1e8ec3a2af5eaee004ce8532616587d4f864aeb2569c45 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..aff58daeb7e9395e3061a2980e6f7608b2323360 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a3b8fa4e7d346415338fdfb45c939c2c8800ab5736a867a9ce4214ebd8eada9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..22e96f17018523a9a1212437f36c340ef0477c32 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e511060e10bedbbafeaeed338132f94efa6cd61a26f1eebc3decf2a4d4ccd93 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e3075c043dbb1f93a0426f0bb3a803244c7d352 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:301a02786538d89f3132e541ea72c1c1ce858149f305c7ffec9e4e9c78742546 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..422cbd8c6bb7cab35a6024963540634274e2d2da --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb766596b5acd320f4c2a5a3353a211230f56873f78f491a1d793dc9cb21e1f +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..528b532d97ddaf8be07ce9941905ca3a77a6d83d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be39cd7593355b4d1a1ddaac26f41b9d9d04da8ad923b2cd736e3cc4e390c7ea +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f39344baf208e1907e1f33b6595d78e0daadac1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11ab5ec0567a9e757a0c9f0ac4c3ec4d069e8226fc3d960924b635b44b0e722a +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..62652eedcff1b8c997084e32c5f8d97848380a9a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf2756cf7fe6feaa9ca7b4b4182af2b3dc953379b42c91751384f4e3e983a6c +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c20815a4df46c88923097d3194609dcae89f56d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04785ff847d83078fff4a1a6f1e893c129218a7d11ce54ba99f7e6eeee151c42 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..713381c45b3e5f1664618c9655f4d167d0287de3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0ce7eeda8da3c0c97a3927cb22309511e10833118b371de5a414b0714ef1a41 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5957934134540bf13f6ebf382ca27d537ed731f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb562f40978fce69288cafa2180fb418fe3115648b40373c6178a48a46042a7 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..211e92420b9e3169626e1ea8c3f762340f25a344 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da61d0c269734e3ea91e8a6a15410fdeb1bc69a8369551ae5ce18af5842b14d3 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f721f87d36afff8876b2b950b8b38f8e5c25d632 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6a797cf9bd8ce8d68df7e6cccdac9657c5dd011009836208fec7a536e7aaa63 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9fc0a2f9799cfbaef43635a4306a6602a6e25e5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4759f9b41089ed714b440972a26fbee912fcf0bbd5089073529c57c57b8cfb36 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..54ba04f796f37b600f8081726cbd102140615b7f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:303b76aa5dd913fdd0ad611b538ba118fd44ec36deadbbbdc7076159d82940d4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..57ed57df24fbbee205592ab016fcc4d8d1cf6ad8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccb9a79d13bec83707cf38aeea74351fe62bbe879ca37cbd0f751c0c52c8a786 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..093c407b84b5fe927be118915930f3acce31a1b9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10c39c2fedb6d862206739f3bfef88059cd46944aa4d50ebc75c16000bbcc4f0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa4df5c73b4ef282081d098c8759ada3996a8f27 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:551e1591ec4e7fa6f4a6278310858f6efa916040369a260f8b6ffe7042b6a309 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc46e33f4c0d97ca8050c672942d325b9f615a0c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:763c90158a4769ee4a7042b286fcb5657a583eb12b560448c3fdd2825b658d08 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f58aabd71505e32357c57a7dc2d127ec25815bbb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9f7203bfb3bce929e020b3aaa4c6afacb68a07e1e90c8214aff3d8b8067ecf0 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff2b0cb6a70b78f0c4bce11e58dcf6d5eebabff7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:631b69e51f5744a860cbc3ef6bed5f949fae5cbab0fa4ec78e75522100797b78 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a50f4b3e4b06d419033aa90c2368308c1deabc58 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2657b37bda5a554259d26d97d79d6760809a4f5f91bc7fd4ae00c21b8df0249a +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..834d9568434e07bed2ab54d04913d43026c3298e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:710b789668f46ddba33b9b0f2004d6fcf57b3b933d7c92548a273b9fb822d6b9 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..793c82d66fddee77614f357de42b7317a9a38f30 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04f82fdb7d15523231582c53d860882635de7438fa698e56926256bfc9b64b9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..702c087e80c8c30c49ecbf5ec61e837c781d9812 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28ab67ccae63bbfce6a71ec911b7c45bae9a021bcf95aba3f00129baeebdc716 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..af739543733aefa7186c751c414eba2b73ee7380 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c82ee85f05718c04e4c119d4a156dfd0c3c7e3be7130ab7659dc882b71f6648 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dffea69c636359bae25a3ee98f70f1fa0503ef87 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:363d6479e5c932dbe8dedb57973d78daa4d68797d17b944c401d62300f7665c4 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bdd3c00e0b9dfb5fcb60c729ef1cf11fe10fd38 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3bd00770e13165afb898898518be00f7e600bfec77b9cd3543406b3c0b8e21 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed5ca9209aaefbf08ad1e106ae70f0412514d752 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f996006d1634f1410fe1310e2f33c5c4262e3bf6ac3ca2d3bbe5b403e632bc6 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4574a4bef3cf8c7929949a2d1987008540dcb66c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb45be55107cc37682830b73fe7d7b3174b668c3b723b7bf6ea949954c29dc97 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cf8c1d6b3cc625affd48ec0e659ec7049ad88ae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8adb3933710b2aad403fd2c0fa924db0e9888386f0a29a178bf32858facf5313 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc5e90274b2a5b97b1d2858b6d849e56f003a723 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7695cf508296d4a19841480de680bfb6e46ed945fa40302b51b7e04e1bbd628 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c158c25b6f911dc88681f9222acf07962c62a3b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90352488c6519b4caec2ff39cbf655e4278ce147492b4b388362a74936b12117 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2d72667eccef12c6a2145edce3c2a17a0702dca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c0e96e76bc2563a05d4c609112bd3a4bf6c4af7d27fba94912db07da6af5adc +size 8875 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..695c92e18b436cd1b7270c18a322653eff131b2f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c744517698ec30e2aeaf1959ba895a3e1d0bf5598efd0d838de8ea98b4ee7cd +size 8781 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cbe4ef9ed40de06c8ca28d3b1392e888b8f39f9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1004a4f75bed022f2299cfb7b4d65199f3729865048e1485605ef8ea086d3eb +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..17fd9379fa20803212983c9fc512ac1a95f20dd6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ace53d53aade72ad973730605e41ff8225b75a6290383d887517d91a3aa60917 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d0c64dbb721aef4c3b06d62f0f59f4354c6b674 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57d27ae1f77e66b12586575dbcdbd2f258d1c3bd4bcb4786c8ab1860b670e9c4 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a227fcb7af87f849ab791d4cd1fcaa7a58ee836 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d983f63a26da1a0ce663241c90f86f5276e9fe6a8ce5646d3f36a896a43e523 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..02446b76678dba2f26e3e257f0c37e75fae47e7d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc8463ec32532283909f44bc33c028be464a952794677c32476e5a80e8e6841b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..87c79b91bd5e44987f79becb66b551c233e6be83 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc7e9d746fb093373349596577f4043ea2af178ca58e47d62be8f206ae0e5a36 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5a6117f375c17737ce9689ce8e65fcf8d4114fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21876297204c2cfe1cd40f977eb35f08ba59adad9d70b9939a5ed0d714efebd8 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3ea4d8fdc45f8c1e7991a697ddd25567c7e590e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:010fcc0909cd950695741fc1a58db6afb3550ed73e9938ee10c5e869e6a0369a +size 2731 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb69dbff0fc32cbdb86b143a89e9a29536fb884c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d9db023523aab82bf78e870b0963addee31f1eb5bc5598993a23ac267a442e6 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f85534b2ee66ee6ac45825b97aae78b92df23fdf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1a9ca1f68d071630d389335ee7e3db5708cd89cc54dc5bab6a06eac578b81a1 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c09883a8a61b8209f8502d1be648d0038540cab9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9fbc1a44d5ffbf130de85eaeeda211e18ffb91dc5da0643c6417a9109e31948 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c30321e37103149e832091d24670e135d31093a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:882a60028d51ef60be0caed91e532b997ed674e4abc9cf15dd8a03bd914da423 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..05f194551f530b61505d502d9a3b83e7d195a0c2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4852e41479fde879ff12828f35807db422e8fc7070334118dec5c89a2e62d2c7 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..056e30a5082a0f530155555b7bf6d8ccc347c055 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a303f29defe298e483064761bed755b245d70aa9f570e6705eebff7c4bcb047f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..719340130086802316252791e164b257b8aee6f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a88bbc8d05988f5a940458d7ba9431cf604ff0450e49189f2443b37411db3c5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.34.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..423f002e4df76a50068c1052c393b33f5372346e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83e7188dbdf2b46d1a2c06798fc6cb61c81b151520e4147780f603e7b74049ba +size 1180 diff --git a/global_step243198_universal/zero/model.layers.35.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ead9274e06f1d0471caaae265fcce287db4e0e1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ad99b761345b4cc7630c47fb24d74bc3d67952985cfd74cb125513fff5eb1e4 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.35.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.35.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1def29a2c784e2d37430b000fdf1bf70c2552e90 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1006a005e69fae38bec857569ecfda6116129746e60a395ae598f13b0f04d40e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.35.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.35.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4774c1b46b9794d625209ed166e0b953f328af6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f2bf1a841e11ca8084b5391719dbbab7cb8863c64f8113c9feb131396ae4f7 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..65eaf84395bdbb7712b3a7d1de45080c548c152f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d7d9ff600cab8c72f643d06ea51efb08fd37687d4fe0fa2cc09b68707627ef +size 1195 diff --git a/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..979cb6d1382d9213c0be91be116cd2ad75ed2ae2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2fc694f4fc9ca2eae69cac6cde01d4ec9c1a739023c4068058539562c1a5efa +size 1165 diff --git a/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..60af4b6478796ce03e3efb3ee48e1c0106dff700 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bc9219d0bb0af9de1053a3529155bd41ad41c75fd1ef86959fc0c6e9328858 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbfd4101345e740bbcc27ed1cea1e7d154ecaab9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:899534173d93a700c2aa4d377adc6a0564db84c8dd26f7f79f67f99c60465e49 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b2ca9b7f8238bb87bbec822137bc8bb11cc9c91 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16bf79f988bf9499d3b177b5014ebdfd7b68ca07f5ae0bc4f9821a0e68097a2f +size 8781 diff --git a/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..85faa9ab2c88a1d28233a518ef294e200f8a2fc6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490ce9b607f7289406a3fbcb0fc94032b8433ba6c40f335c053952d8efc3a8e3 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4469c60d2a7db6028247820786e3b2cbc2789b2e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e6916bbc21b54fe40f895db3d05571ef159ece176dbbfbc144681796397333c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..27df8d5f608547007f860178b1dff8535eb1404f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c216ffc169b39c2ee0e4ceb509ddddcf445d85c90dce953af73d376ec76b709 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e289a63f5a156d3e1b071fa5dc23d390b970c53 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008f1f3f3a3e2d81d1d310f5b4e0ba64443fb1bd0dd275c60ab6e29765901cb5 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0956cffcbb1dc884095004840c6725066d8d16be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5d223cd76087bab9711ee2294ed6002ca04e26cbd3f140df8f2c1ab0e892ebe +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..de40e0ebf901b0b51601119fb399d891895ba3a2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9aef0df09fd6a6b6049fede8fa010083c0b86629a767ebabc8d39aabcdbc7e30 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2dd503734ff3a045e18e935e87a46af70769d5e3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1212cf5ee6e938f9538bec1cdc3bc9e0eabce0c45b3651b615c4b59ce5b0140b +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..61dccc7a38ad69154c98dd7fe6e356323201dab8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ecdd5267bdcbff66d4d73a0b95ec07fab9c24fc7584be854459d42a42f923b +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dd2a73db880da5ba0c239ae9a7064cba342fe02 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f495ad4c75e356bbedaf546feda57fc6d40271283b59c4c2d066c4b0b14e5e27 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c8b423058caa4ff7175cc0581383257958f18b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d5a2a068096d69317d4f2d7fefc3f0b41ec2beccd849c84d462f9af0ca56bb7 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..021dadd2bd9d8daa9bcb39c7b7fe6d4970306bdf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3dc1e26bc7076952e52431a39d608ff11a9712948af9c5ffbd051eb9611a1d3 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a31a634d9a192cff4b6e4fa7153e823f0857b4b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6462f44fd4448c1179ff008ed28f2ed5931c971c18ef5d0d95587c14ff6664c9 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..16efeccad077bd5dae1838d334025bcc4a384bb0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87d87eb23c09b8ee73da090e0b3435e0eb9b4bd8c7d7e3506e332163e2ca1ade +size 8860 diff --git a/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..27fc2a31db7888fe2606c2abb1ebff9e90af8f2a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0007a6e1533b9cfe582e106b04415f89fd0fbd73f57313a2781ef669044ec6f0 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b620f549034808243a137800a2513ac713603ae2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0b06e27f684d2b6abbeea49d9801f12908f2d6f3f7d0cff498984abbdf4c68b +size 8781 diff --git a/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..512c7b25fc625d5d399693c5bc4c5d4e373a929a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51afeffbedf56eb048062c00ec27afe00ac5f2f29f4a0e46fc1f68479acdc657 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c4163c15e757f02f4db1174aa039e1833e62d9f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c476f07aaaa777dce97e041baedf817b9fb6fc52dee45470e78a10fc1fed986 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7eddb372d6a4ea094e49a90f2a85320f6fe9cec4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d883ff2a792afff0a89c2aa0f6b7495a5aab7214900e8ed89bd8a34260d18ef4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc5bbfd6c9524e98d1bbc71e4368e444b15b2740 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e4d9d4e7443470099fa7ee3be2efb73f3e9cdbca5d9a99ab27a98279312aff +size 2716 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed8c629a9e22445d6e8f2545fa97b501317609ee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d935f1396c207985b97a7f645498eed929f06297d685526c8de0d95b02d0927f +size 2731 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6630ce34624b763c0d406a9acda8623e0ec8db00 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afa66bfbb66a2f15e1cfa93b11457ba32a55d4e79878bbf46b91db8971cc4e77 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..db4b42be4203d6318d7532b3baebcdac54ceabca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425f90541126cb44e3da9bf9e97003a6fe0f593952d826b2d132e5463368c528 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e3058c4b944eb11fe31cc57cff8ec2865cf62e7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc1b724098dce771da3a2abf049598f6c127b1c56cf106f559f4ce073245cd5e +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec185ada8f155006b44beec41332ebbad1cc9372 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f62dd35f2b73bf26cb6691bf433e005eb568d263305a84d59e8a03c3b78cf61 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd3fe9aaa44a269ac8e0c3b057e479efc0670aff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f52c3746386339fd4154ac20fa022254395fb5b0111e268aa1d10f6fa4913d4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e65e25b66236b13df5c810e962274281fed84554 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aef2dee61d5313e094554462863868985c068768177a2364e5e3e8ad45a6bc9e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b61fc98c1d030803256de7edec36cbe30fb79739 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae34aa70c2e50b864a5f164b4df44a129317e74e5220990f41760449cd5d212 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..854928e95b5e723c549949d3296f2f372d696f24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a06459e12b1ed20249c53561c09f9044db096e9db5729609ac117ba9693e8f0 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3cc9613337969ae9adb357686cf2a0e8131a166 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a99a9dcd7b9248d8c3c9a06bd92d1c4f5849950df27d56c8d2a054f5376423d +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..50d9798fddaf6d9b4b51aeb19363d8768a0ef012 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6903230e451f62b54c8617e09441cf6d7da7dc6b7ac56a5b1d097da5f13e415 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9eb95b36f9c689a7cbf1f31bf62843cdcc00c1bd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0830611e77763152511ba1c50b3c9cdedd7b7259a153df045b77e4795d0f462 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..94697d8566977f200aa240c88b0515c548f84c52 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bce16d0accdec4a489f460e0d8011cea566274bb7317881836d06c870dc4e02f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3eb5ba347b15d912ca7d6ea30dc470f9c3bd6d94 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19674742efcd7816f311b1fcdaff513ee94d336d10234a47e8f13f80f1b7c8e7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5cd026c5fb4ce41759c23feb8119a9c9aa572e2a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c49566a4572c3212b3113598e99a7e49c77ea0a545b52edef222e90ddf18bd0 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..269d5696ae22e4d950ebb2061b82c1e254e27937 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8560e13c3e7ad5949e389b6ef0b71d043dd4046312e9eeb67f03d4897722449 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..60b31f83e089f781723241807e6ee21828c69923 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6d93b3ec321c4694d266a55ee557ccc6f26e7215fc1aa7e48e5febcf6ded880 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae91ace78cd9c1a10f20c163fcb7e3e63a2343a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed10960d2a78a42ec9f4e84f94cdeb240c56b71ac2130d5d117f04ab706d3aad +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7e5677d436c5fcf30d227f9a2545be494e432da --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a5de6ab27129ae1d49c0cfd12877a7e5cb87906f48dab6e9b2817e18983d5f +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b37763d6c6684bf8ceb908ae5f352a7532a11f8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1def3a8fd1c267031b63f4720ce1c10fbd60c1a70de1747c4adea49b50122062 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..386fdceb25edb38a5a79f92891247c701b546cdb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d2f957e38360f5a74e396593d9d847140893e3642709216ee6b1335d366b840 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..116614304afae58ccaaa025816b7adfa3e86b634 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:134aa3a72a4e70f83dac76d144dc7dc5a6ec6e53496597ff85da02ae643af395 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e3156fd7cc1cf6ca4e313b44fce1a62c1777b05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efbfe753d5a2ae5c9cfdd0aad81063f58d86bf2897bef15b67a1f31dd7c62ad5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee8e6b116a9465ad96515028dbd9649b25c7c6f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73173966ee1b3e4d9a685737949bc48b3ea0ffc3cb74f43f2a63f586469760b +size 2716 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e977297b0841f7082b1c573bb5238cc5056c43da --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5adeace0e1cb9ada4356c0cf8692b8d0660f3d49da4d8df9f5323ef7e7078c0 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbdec604c80ba2897d59a1b733ebdb84fe877cd9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:164cde697d1d9bddf64a1deb156b1cac6b239a4d5240be0c6ce0d8e287fbc8a6 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf63cbb17ddc37a7b1a2ac328bf39c35c2bb291d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee8522fc771abf39ea0331b700ba7897f554205f84a94408cdf0b55f81a54da +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..79e9ca4ee806aed591692e39d6b9fd8396487dd9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673054b362710dd9685c87247f86ebaf953d0056968e5e1297523c4adcdac515 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6b1f1fcc3ab16e393e8c46199f76ccfc03c252f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a2909d040b734c36730758daf095db9f47ada5b98dbd0a6821aff8f1e4c2c2e +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4724f23da1f6307e9c3be0248a7b7de41a2df405 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1791d01335050f6ead2e650c6275bd720fb648a747a45c3c42f156919f88224d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..641a732abb932b9ed6e9f93b225b863e288cade6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a42cbcfbed4fc43a0cab11bf2f3225b6f3b00ad396b6567661633164df82dd65 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fc7eefde361e4c14f66ac96df6625ea9c5f65b0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94935990c2e81ce00b89141d940b093bf6071cd999329ee51e5bcf23daa03bbb +size 1165 diff --git a/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.35.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..12c560cf9a62b544a03f8ad83865049c7d1d1e10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87462db624f867d252d3133e9cc3d0be9ce427ccb37a8715309ddbb0e0eb172e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.36.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..177377719ba05c13fca933ccb15955539edd1b94 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e1ca2f1c1a267d9f5df1686c3f22565d2039328b02c39038b0d924a0c9fca0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.36.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.36.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..06c9acb348e94c6fc60513ad415d018d3a6bd046 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e53c0ef550405e1b605ad6b607fae996eb6f3cf3f42039c1859260138d1883f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.36.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.36.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a7a1e5913001a8ed962f4bbbff9da1bcf87d415 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08fd9579f92790c8f49ed9d2678ea91303de23a2f3c269bcce630cf872ac5534 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e35ba46c829410aa47e3c3424ac0309dd7fa8d68 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a7b8cf066e4d8d7addbaf16f2aef2e11929d237da1784755f0903fb87c952af +size 1195 diff --git a/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa19f441a4c4450e2d73fecc5710c921fba5be21 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e31c178a529194fa091f01443ec21e9cac8b57cda824edfc0c463304ad0dd1f7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba961e4a9b192c03969437c1f2de1f27fe86106a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff88f8fb671ec6b330a22762cf2f30236ae73ccfdcf4917219e6848bcd5d80e +size 8860 diff --git a/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e461f1e12a543fffdae75fabc9d9cacb3a706322 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a665cc5584b08f2f8fa0f6fcd14cc78d54a9ed45c8de1286c41e64f92654ac59 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a18a685be34dcc18db76e3954a7118e705863185 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3748322259f69ab7670426344fd043a77f4b38f5503b6117bc45f6ec13430d2 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..acf2559c57d14ab9d2985643212de470eb66842b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06bfd457f1b0e0e7ce28d4913fa15c1ea4f6e5a8e401d978c80c3c89c1165fc4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..154db87e81cc18d2558bb83e580ce5c343efcb48 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb9067d764380bd6fc83d95abff871f952a40e88f1343d17eda8903776ee4e3 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..833318bf37796f39dd424894762e096d86b41bf4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78ae5378cc772048edf2924c37bed329fad0f9dddf759f79e79759972cc7e961 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec0497995e84e89865dffc9a17c89bce4529c8e7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61504ce6d26fce2bb79e4a40e24fc3b384bbe46d3d7fe37e46d1f89a8ccec75f +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6713050007e1325b684fffaea4eb67c0d1a74526 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a29f15395e3bea2c95bf1d2d57031a1089886652426b46246c6d4d0dd3856ec +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e971db5fc51fa3773849cd0030113f34e51b0f8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a36cc24179fa31b14d1970ac8cd361675a1a2440eb589299b04dfff71316b57 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f20ae9c88e545cc441de2191767f16c843f7fbf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1e053aae5204f7141551d589d4b58b2bd3655196cbe444bad93534e4235cee6 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..387681fcc09ae141891b577c393ee471a62522b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5ce2c58a3994f5b7321901fc212144c7998cb53324b775c3e148850a1c5313 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d67a7f3996887be941dcd87eb6653f8cf11ac1f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:123572fa2d7c1b79dc7b85a756d9230f96c5a8648b5d509081104b2ccadb029d +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a24c0c987052bfbedf05a08d40b491bcdb73c98 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a4320ec4e97a5d121f3edacdca1e061d3f0963b0197bffc58317c9d232cf0af +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d07e77f7866aa209f30120836b15bf69ac206202 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8270a5a368735000c94a3fc7fcb444824deb21e26cea8b75d6c3dcb7a0afb287 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..98e090a14bb6d6b0352c5ed4cfd0dddc0925c789 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23d45ef28bc3a0b8b7be08263a660c12c6db953ca9ff40462b9ed0342ecd4d1d +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e196713e77cd5d41f8df9c1d31bef04ba167f63b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acf5ca9e65eb047e6093be2124d20839bffe3770e44536f48aca0c6869232f09 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5bb4dab81a5e1ca088395b3c7839a96a7bae182 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27f389d6b0822c79402cda86c1db971242f9d37ef1c560765bec3ee17649d15f +size 8875 diff --git a/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..517d3b9f2a6c5da5355b3b315a3263e189f4aec5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a8107270823da5fb1a0a749c11af8d17f528b442979f9fceaecba1447167e17 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d1a8569bb58102386193384afdbab4e8058ee63 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5827d5e4f6f057ea5435ae380af1ad0cbe27ba7e64860e69404d973788228d6f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbd61b65a52fdefed7f0a1f3c57457e118709721 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f17e24af7a2b432d8bef180107c4826f12cdf735f13e35132d3d9f59bd9c6ea1 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6604075b17c72410057a63528100242e8ef74040 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69424a35e64833b4ad8c462b06985fccb60204978518ea4d7a5d0763eb0ee4eb +size 1165 diff --git a/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..801608d7b0898ecf571521de5f2d537fe4d1ccb2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aaf4e01f18e832da9f17b5a9b381daa63602ae6ff74e49995f1432cdef6a834 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7984c8d268873b9bbbfd537683844ba0ee1f882 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb966cf09e06528b4d09280866f4c2fc6305e7a2e335c605d8c06f4ba55d015a +size 2731 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ddffd24b38e71cc1ec73f80a4044b18e03f577a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f64fa39abf5e9d0437f00ef817bdad2868750302cda23bfff5e6f1f5ce128c48 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a66963cda492da4d9cbe5609ce4259be5a66aa6d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c69c08d7cd63f6ce50dfa83d8f72f8564f8e5274965b3a45c63e4c0c121008e +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..12e2853ad6d88790f9376808a5301b0180282753 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ced0a6a8ad0194cfc9516a758c6c2bbcda977a261b01f26ed1ecaf5c9df52ac +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c947a66ff67a05e575b0c089f3ef57192792962 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bceb2c08f6ced5b906cea6540d660b307db5055a8369535c843f5d37c16eb3d2 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f56bb40d2a724fe67b58b63e246b0376d9059f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04402175eff28415eba5e00b143dc4dcac5bdb4ae261fb8ed9f41e57cbe57f29 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1519eeeeeec102c004ebc4fb88d004ad2fa3b16 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6e247d80344e76297dfbb036122a8c70cbfb2c41e588e48e3fb54c0b6f3c668 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..636dc856e5f537f92845f9059c6bb3a5599b2e89 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4f1f8db8ec12be3500ba12e6681a87bfd68cd5d4caf2418774bccfbeb7b574a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fa39a450ded6c856ad4d2b2eb67e9158ce0994d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30bb46d860e15fa902b6bed18d46a106366d2a8043a21cbf03813a372762479f +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..02637590abfa562c4e7c52b9b67af19f447e344c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c0ecf5d4d0f49b65123ad0888262ef11daf9db12fd0d7b55f65cea74d0eed12 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1b998f9dfc346ee0ef71342f7b76aff5cda5e5f9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcd84086782ddc2db8a7f8a67b458ab0179fb63d6b81d164c16006f9bdb49d01 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9974a66913aa85f63e941392ed182893732a39bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e88051b1fdb9becefedbd029701458694583d9fbcc6d027c6d156bb74feeb02 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9595752653b69a42e34f960d7afe7e5e55398f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0266a6a3ca51400289ff719a6775b274771d7fafadddf6e6e09e72aef4913541 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a364062a0cf30d91851320074c38f3ef0ff9a874 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17bb3afa6986f4447d1c906ca8d3d650b7a2f5edd77a1e606e23aac8eb3cb3af +size 1165 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b39dba2a22b7ba006bb6c35223d3f777aaf06203 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b7c145de46851be62f38109de84553f70a24a3934aa544a6356dad362a1286 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3c477c983c1d881f38d9c97eb628b5c2e4c2561 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e3159e2cbcbad55a69f549170fab0eb06af251e926fdb7029910a5065d3a19d +size 8875 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a3a5c641237115ef547c392b1e5433f014e2325 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67a186f58253d6f258514f0f7defec00e884985c94b08de5071ea9f29d76a3b1 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..59782c90571a0c1a8ead929fdc66bd70efa3a56d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc052a9008075d73df066a68db0313dace340671a9919c6c6906e3ec89e8b45 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d991dbb59747d6f1685b359f1a268cb271ac94be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:623848b2359d418ad9366bd8c198b303d2818aa22ccb9e75b2d5830bfbc107d4 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..473ac797bd21ce5263bda29970148ada70f23917 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03be63f9b676a833a48f7f35eadb02b7fcb883c9ab84e4a9d2313efacaf6c5d8 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5172eeb201bc6e1cfb0c8595457b70afac406668 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:380a6ce8cf4f0db2dbfa13ee55c8104cf6cac9f0ed4f193c7e23fbd99a1939b8 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a7bbb3f3354b9dd966abe7f1c66fc4b9e1dfe76 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a4e1206a82b4344768b861fa2c4a2499d3c943175a2bc3323fdfd08e73598b9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e89dc030ad0c3a2df499449ba9fb5432ab3998ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac20b9e38fcb02647790dc17a29534eca068ff2ad8b3878d6dc3866ea43e44a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..68961d6bade638571ce229a488b079039286009b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6274fe6016819cd930add4cbd902a9f267b784c0caca13a8171cfaf0539bade +size 2716 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..755d4b403aa2f4dad55e35631408ea6a7f0c3903 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:540f47d3763101ba5744ee330217f2b4e57e888ba4a4da045f1d28ced26d3908 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f37be090d6a888d563eacc65f0b2fe73762e5788 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16a58cce523c859916f69656286a5f88bf79a0b32a8d2d4e448247bdbc169d2e +size 2637 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..77effa1eb9a3177a89ddc9b96d61413c6ca6e3d6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1227e5ce38bfec8d4819347fb700800f5aad2d9c3a6ac5b00976da90fced76e1 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e95bf8279d42f30cd06db5ab053c0f8c63ba3ff3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc80016a74cab2be7c2e82f4e5db2abd446821c2ee2a8748431a926438b6ff6c +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..676b39f25d0a86841a855adf7b0e88c253ff6b2c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c470b8f8f5372eda666da9d8d3bdf4dae48133244b1e950b6c5719e81f01f11b +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f868a62af6939db6d65d84a5ddf950dd35975795 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72023de112666a3bf7b3e81ac908e3fa88315cc9056629a3425e454004b5fafb +size 1180 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a146c3892881a4b44b71035c276cfb31cdde1e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b4c91639d3e4ab52dfdf85933597d84f17880b1b0f1fe9a799d6bc01f657960 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5541c003a024f34ec0a04b2e23665b9692f59124 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1365cf679bd88a9f8eba6a382a2dade8c66e08f18bd992e69450ea63c9676bb +size 1165 diff --git a/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.36.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8dca240ab2aa0e509cf79efe399f68423b2f1aaa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45761ae1dd39e4f01c812efff4914ffa9b3bc8160b2a7949a43fbc94735a0a6e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.37.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..17f70c143ca1ad32d85c45118bcf6021120cd670 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6086b8de034ed0836cf8de7194bc48f4a5bf7e0879b18e42b53198b8f27d0d94 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.37.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.37.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e4d25dd9f6afeea475c92201c8fd7dea0fa6169 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24852aa05022acf86b6d4d98fe19fd8095d16a6fa51ec71fc1cc6ad8c462ce53 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.37.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.37.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..79d40c2096ae9e6adf735827f8b37a5b9211b19a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052b4530e6f02e72d2db2b60cfc2261141a3d11cce4e8c73223ce15824da1855 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab559c27147e9ffc3f729d177956a588a6d2b121 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38fdca56cbb7f9024430eeb50217fd815051e83dbc1570a1c89e3a2eb1076c7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3b3346173d7b2c0b929c669a75f6cc7c14b5fbc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704cac5efd2498cd5fa7029bae5fd75df5e1a57bcef96c0023417c2f3ad46e90 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4214a1c3ca4bb302c972d2f93e8887bbb65beb8a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fce27e8efa7e89de7721eac6477e29e986f06301db49f2d11465ca812de732e +size 8860 diff --git a/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0dc747eb758c0482b8d1ba7dd7d1312ad8531065 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1805d163308f3f6ca78b8ec75f2142537313b90bf87e33534a259c19bca966dd +size 8875 diff --git a/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2979a4793d8fc4a44d637bb23d337366dc2b6ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6d7fcc4fc782aa1996edfd22d16b0e8d9163f7dec9f9ec99e0652c023b627dd +size 8781 diff --git a/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..18336fa6297b2a21d4467a89c016d7cc8c50ba33 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:352c5addce6a931ef410d51ee022378847844814bb050523b6b25f30edb08ea6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dbaa7c98cf956977bf47e34989c5c33207c668c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c47e01e741b1e7a84c540a825368b6b0c8af792a8717c44f6ac1db26242afe +size 1195 diff --git a/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2aeb892af2ab129b0d62397f4d166530d6789181 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea24b5e87406aaa00919ecb46577740934e6bcb316d70bcd5bc3708c944fbc50 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..00c866c257a65319d6f0ee9a45f164e82e672baf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73fa15130b644a7b245039857f922e00c968cc7e009187cbd21506bc2e3d7a04 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..df713af052fdca530f89285b802b156dfe247f76 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca79576a20bd9b807526f8290c21cd8695874a0c3ba4014f96ff5699bf9b1f22 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..087c6aa79bc9d0bae6b967f8bcc7556e8eb4d0d9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5e023867438b4a7f3a17d1bec86d6b2b2498a958c7287f7d1daf02c07b8682b +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8926c3af4bbe612baddad5a4c378b3f2f78726a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df70d01df42d96e8056dd4a1d3dcb9b3942fc5866139ac3eae5317f11187267a +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a910c821cf6959f9796c845b13ff057339cc2672 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fbca368512138e2eb165be8653fdd680212ea8e272f93e17f85895855a33ce0a +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7656d6e1afff5cdd64c2c137af8e31f4a3fc1399 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a5fd9dc3413f8f54ccfc23846a1124d2af5da27874851544e06b4fcac5655d9 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ded882cacfd0c480410eb392447c214474cc3da --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b503c6b6f3a1c4d5fdc27b703c2187cfc692a33f6c7a21a630105a50ef650123 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e6b031284ccb17d81c8b8dcb823aea4059facc5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b0dac26da66cbed7bfd07f786133913829ea3013608dc6c84635a876fa1a32 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecbd4dda6f8f495eb741ed9d28eaf3f42a39efe0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a20b479c2833c34b73d36295a141d609088a4d91b139c163063a6b8545cfab5c +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7db1f1279d123b383d8eda7fb10560a6a3e0f48f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f856b3a75354214e6f0ff78932ec782bafba33e46e6f73eec3cab02410fc8c0 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f84232133f9cf03f6e043a3809eac3ead57d38d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8f5c52ac283650e90712a3bbf72676229bd9340ef3be07247afbd1392a3f47 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..28ec2dece7c51d73b29fe05df291fb6b404f897d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1213c51dbf33c3e5829bec5a643d2301bdb06e255575719916b349b0504201ac +size 8781 diff --git a/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8d8c4c9340407ac438d82ff1d1480667cece073 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76a3cfb27de5e6d1c515eea52c22e49d3066547a03d8594d701685998f2a2775 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f58e10219efdd6d49ed161fd325a06009673229 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a3fdb016357ec025ef0ce26203f7988cd16b18dc936bdfa219f19a2ae433f0b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cba58f30c7886b68122162a605bd6675a020d545 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c7cbd89c7911a68b80f4fa1790b4cb1488b1d1837ff7cdc27a65c272485aba9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d146c040c414e40b7c0fe509ed979e89098949b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8436a571ec3a90fc25684d20937df158d8df1ee65b107c8d2a07eb9c5b217a7 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7eec2d1df6af93e684f605eb75445723e91baa1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06b165802995cff958c0da67ab92585b68408349be9fa9866e552b70c98d6fc2 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b382b19ed9d6957ca9db0c5ea8c73090989520c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6193d873c0d2d708d1124ddf37bb389e585dd4f97ffd84005a83b60405fb9f50 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5eefa8a8f239aa2be7d78b4b91296a865878a81e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d987b339a6d25aa4755131ffb3eaa8f8c5e84143ecdc9125f1dc693ccdd86e8d +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9276a02e31c09bc896af0e0ad498b7fe27838e8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82be7a5f828e4e14f858c7cebefb83f38829d7ee0056376b44e7a5491c57a6c6 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f5a3d5fd8cbaf3afa8f3f2f4ae432af91f50690 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d99ee9a54f20d6fac8062b4bba087f58d97600520d90db72e0f355e7dfe834 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c936905b7f3dd9caf7ce9128178c97fcd97d2be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d49b5b650a9c61ab6381d24cbbcdac45165ac17332790bb92a5872aecda83ab5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e3e2018fb0d38187874264b28b640b0b8fa920e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a47bdd65c758d1c0a0c145ad4c766e3daaa8244fe6c4b71b256a1c415a7bbd +size 1195 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5faacf2090c9033f1eaa78b13c90e95a32acc17 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7f36e22ac7f6888e5482482f5dd309d459f8c61912130ffbb94190c41fa9e2d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a902d47c0f56a2ddd225e94c29238f421bcda75 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1362622d72e7e570d9e82a0c7183bd6eb9dea95b672461630df734d2224a1b81 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..776b7001ea9ac4341a013106676a9a98fed7845b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d8ed18f96267fb75c464ae7e4a643242e20880c43d0a25fc3e879eba804a1e +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8f2ccd835f731b4639ce6e8d957a9d1f2391b48 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996046f9aaee578b45b16efd747512ee95189475b99584de42403f9d5bd0e880 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d28cc43e6215a62238128598c226e8526887a317 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb39bdff2c72d6b3a9697f2bed3fdc3184dbed2151fbcde88362dd79e1c3745e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..eae471b5e64a1f9d06ca81d1feb9dddaa61b11a2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f11531f26a026bf345ffeb06ef84aef54d09ac285bf18858f30b3d37bc511974 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e34fc76cad0501663bbd88b1bc054b806d3747cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01b3f85cac9efcede03ced62ffece99008a3c015d59a638259965f4dda45e982 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f060fa583d7f37c428da7f7faa3af8d7dfd6b832 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21054514571200ffe993ab1c4b4c1edf0a5252beb92181cd85efe784e0ba46e5 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d34900c98ca4a94413b66b18ec33ec8984210e8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:967b96a195d8f9f88f8a1946d7862158376e41f8eb5a42ef8126ada39fc21d5e +size 8875 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..29a34ae3cedf0cd66571802f45305a1302e91280 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07adef0675e84c176abae3b8a607335c389fb39d8cd46332d2bbe31e35be2ca2 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c96ccc95adc6461ffba66d2cd30f58a55e8e709f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2688cbdd514f74b090eda9430656dcfbe28afb134fb941847cd9fc8a4c373e +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a911fd594a9c814defa6443663e901d844217ed3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e1d441ff462c4685fb132862c3bd0a5a41e09fdaf3158e3e959f05be7c363fd +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4006ab11cc91dd4b4f71e492e1ed80c4372f877e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42db3987d4b6c3ba33a9afd14de2317ceceb770711d23d7694a51e5c68dba6d9 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..26df055ceef691501eb540fbd887c2756078d3fe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f82c440aa5e5c54d885e762fccd47460fcf8ddf73dd3969aa846f33581f11864 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bdc66b633c222ac9842b1a0c062e073cbe76141 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9baaf9fb32e382127ba144ab7cad8d1ce98ca221b9419c3922554a6b99ef27 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c5918456337d5aba15e71b14e13bcf96f08fa43 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a113085950c2955e7f4c9b1e3448963a677dfd13087bc4c7d25e8653ce106347 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9271b7bb61b29abd659f9b017edd9444c103f45f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c12f367596acbcc67a1a71bedb0aa4ea6efb9613ae042b2ef4084c986f86b45 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7af016f47e7a09757f0f503379c240531cf3a64d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a769e290efa279270dc140a0df347416365d525800bb42ade4c57f9a49f4e417 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..834a5f718403298f413abc377f4df1acee300611 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a974b8380f7ad7440eafcfeddb82494df29ef4c72177f5cc20f2dd6771a77335 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..03b3b5b7c3eeced91604cdac9460d39a384b713a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:479ef51f779c309154c069ee7b40821b3bae3b523e3e2f04ba8d64f3ad1d6164 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..57ce5a30570247d4e1ed52daed83eec6e4d12962 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f09fe8a43313c9087b340af57e792fcf1f1c691fd150a763da96f4cb776c0cfb +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..52e1bc22f9edd41f2a9dbdecf9886755553788d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9cb3900e722d90fb08c8acd21bae435276855a998f5c6e205213f55eaa23d49 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..34d1b952299c8714d4f2d359e0ec38675c0929df --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7186d02f4b1b98b675ae33b65dfa4f020cf907f89cc92cfb6f6cab03ecc03979 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f40764f09eddcb203964787083f88425448cdd3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebc10261c21d885f66084254fd71b71c64f890b9071eb0a96a1b49149685dbde +size 1195 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1397236f856a8aed360fd07ecbb2ef3a5327b2ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a63fb8365ef46fe66394e79631be46941037b442e2ae631bfed77084be18ea85 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.37.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..402626fc51550e759c078d2a177fb0901dff3744 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a6b07ce56c58b77c30366cbf54f8aa353e0f866700aadaf91bb0df9411d3060 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.38.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b179ea8baf66e9632b59975bf03782d1a8ed845 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddebdcace4ab730006c88bea0b9f752a686fabd28d46bf4a6a7033840fd99f78 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.38.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.38.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf90e19582bd0ad6d20f44a760b7ae8f970e6be2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ceeb91703cd4239cd331d599f5effd93a7f986fe9c5f642108440e58af15c00 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.38.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.38.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d667e8cf0e176492284685971bd072e5c6c26b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b619f536b47ae574dd875eb0d2ababcddf14a0f46c31b3c0af674e6582c77525 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4908f4bdde6a28da1ab0e96eacd311538051894 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e81d6db1a89ba28d1b836a0a85ee206fba2b7674f1168a20744a310161a33d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bb58e0415084dec6acd7dba218a6bb0069c02d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdcaa905351afa75c167eefedf97ccb27381dbd3a3cbfe4a41bd3f7f7f200cce +size 1165 diff --git a/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3249f0d02ac93f4e204becdb7024ee74cab7a8fc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da8740e463ac80638e5b6651cf12d320b89fead2f8b70f492923a3be424373f6 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3267c8fbde4491c5e6167465572fd0cbeee145ae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64a00aec9d6aab8e75fecd64c8a6822fcf6edd6c4ceef556c39373ae0dbf7ce2 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..915317d687f705c314b96414c1d855924b028c22 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8f82c2a84a0896d78dff9eba50a640d01c7c584a0b608411f53a8cd6535046 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ff13304a3697ce0f5e34f24df4abc412441d6c8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e1eb939edefcd184ccca814845c65e76edc98c8f188e8e78fd6e516682d92f7 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..87a91b2665a47075fe87727e2bfdd9299bdd1234 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e18dce82114c716c3aa502236782200815ede400dba2536bed099c82c7a0734 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ebe6397506d2555185d7e450193e2679b8c5024 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbb75a72c945cfb3d82aed8e4ff871905b40d0e7195f842cf7e972aaf7577137 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4ac8317509f0327c67341e35fe243ef5c9d5a4c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3fae7f893a3d5597e818e5d01507a8660cccfb22c938fac9db90f346b247ba6 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e601878c45dbc607dd9022a19016ceea40b29934 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8599e715bd1c816ea46bb88dfa8d58f18cb51ad0d1889da7793d0a0232cd65d +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f60bb9866241061774bfe88f6f5ec7a115974dd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6491ba636cae438483649c29c0208ab8a77ed6304c6718f76c27e2a284cb2de +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0efd9d51a4999a13b25c739c4210ca395ac96da8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a69638d703fd03a1c929a90f5b6a38e72b2ea0a38fe5a99c02c69df877f06bf +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..80bcfb60bd92f45d5923dfc6f943a56fb1648bb8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e39dd315d9ed6ac1dc35408e2eccd523616c419a99c5b4b6433f4c4d966fbb +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c38d37250d4a600d98703284b95851e3565cfbb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b53154fe856dfada96b2992a8ebb1847457470b778cbb713eadf624bb5adfe +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b1a75e02e163bcf5047e0ac9046ffae298f2f3d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f734444fadef4eb75897be4e4e6d6017f78fc964440efe110af2e67a34754763 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc34a05eefb4545a4555f9c5e5baa2e54dc8b24f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2892d8789da7453a10afe479fd05eb26fb7d39c8c94a1808c70fa8556776554 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a5098003dee9091b674980b57a547007406d5f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1574c7ccfec7ef7a0f49e0994664f69321fdc87f0d91644c369d06870ca38ea +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..103ec9fb2b4ee2b12f2eb51fe35dc0f8d3ae72f3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb47ac53b1a42eaf14f674c08758ad4fe17c1d002c84dfb343cf0730ac1d532b +size 8860 diff --git a/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e5fb26366f8acab0789c22b4a891a8abd7e92c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8b7843f1f11034e7b3427d3443c91a1fd6320ffc712a3d679d23ec37c92c6f +size 8875 diff --git a/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b8efd5e70ec228947ca8111707b146e0d2d128b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c7a08cee35b258956c4c9e137eb8eedf1fbceef81c4e950d1c9d1c64b5a34da +size 8781 diff --git a/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac2d9547f0fda5545023879ab6bc3c5eec264668 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03856ad6b793f8e2f99435c699bb656fe9b80d94a004af0588a835b8e2f7ad21 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0474bd6ea2b5ee4f0cf60614eb74553a87c791c9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39798b756c95d89119a7dc9bfd99a586b2d6991b315b61466dccb2516a0835f9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fbe3d5332b5f8de91c301fdf1862b380ee30a69 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:953f84000015f351b5c299a620c9d18abb07ac8011c646462d78e786a5057ad1 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdf7bcf5fc402f7ec3859e1ab510719c4f6a2f29 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e111c4675722e8e4ff878cbb0069732f2b5a760b6f6982dc2b2c22ef083e35e2 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..adad99b88b8210ddff89f5f6f4a70b0086fec1d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a46a51528b82a9937b3dfa280594757cc1a9648918e5b39fe09997a1b5a7678 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4aa37e929a720fcda8245ccf091330531198deb0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a5853cc63076100406c4eb06cff97896ba627c5441fb3555176c70c49a45c1 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dea70e9e28555c196894782a94014eca938c0a07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35afe7bb022a0aef21d772607497e84ef9581dba3b0ec186a213cc4b6c06e0a2 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2654f6ca06130b67c6b7dbde8a06db6c198fd126 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6e971cd71ee1b3d99e7fc1f90d5e7ddef6f0357217c0da8315a513d12dd085f +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..30e4f848ffe3eca0450eef2e3b6a228070227326 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b51bf2f77b111f9d44c6a72f2f7f3570d4cc9b387ec5ad8f8560ac75660260e +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f868d210b479a9b75db501fc67c717922e894455 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25a052ca3b4cb35d2ae7bd66b1f1dc78b1f3581cd23cdfec9b922fbca56870d0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff46985b592d728586a893eab82e5bea9483a121 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce985c8a365876b785098380fb4fb30f54b9a2357787ed650f7a9de42ea3e4bc +size 1195 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..eeb835d88d90437b2bcc3455420c9d9dbe8c5916 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22b02da631606327d0737de2e53bf8f43e75b0fe5bc80f1bd5ba54e54cc5589b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c96da45da822165e6ff0b8683bf2676b5023d22d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dedb2371e62588214eaee1f0684ea3f0da85859c48e69a880eba0ef69c00db3 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c587a85e362d782ec8ff23c7d6fa27e3587281c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49fe1a7b98471f739c27877effb9560c6312b52435411bf612e93714e25aa6f3 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b0377901e4d5f09ba2202cbafcb67e6877f434f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44745f327504ef58683740a759a7f5805d4590d64bfa56eaded000f219c3e912 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..55c89c495398147078b3c9f4fa729cae1957e493 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efd46f0a11e1ace9fac44e7ba7dd234421c9ea2db30e456806ca6f37233079a6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebc45fdcdcc6022520cb179a7b822f4cb4d01f20 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19b5650b8394942adb74c6a413bbfdc93245f0048cafb6f7b954fb0ea476363b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a757f58aecb990cf2bf6292cb480890941f2d00 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:723783c31baf90bbaf721bf2dece85ab058e06cb179d6c8cd7d7ed50b39fbbe8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..51b50a826f9eab751046719706653f0f87a403de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e4519d59427bacafd7a552488ace4d5a3239f9014f84c4e663890ac31d02797 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8d41a49a8ea9ce6cd43dc10e7b0ba0e723323f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c09065f6804f07c2fffe1e4b6eb13590a184267aa1b7e4b7f652b923eba8cae +size 8875 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e22502d01106ba5513fba6fd93ef2c83cc11b40 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:034967943bd6aa774364d192cac8b7b848a75b3b04c31c43cceb9bd3eca0b2fe +size 8781 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b93c81a838972ca3810990a6a70598f56ac9f0ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d4998d26cf7ff596503970170a52a0d58971ddf3c0fc1bb5f1df16534c73c72 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e4205cc59009a6c6bb861e42e3b7b338fa2e0fc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6631781fa2230131c62b2fcc99eead28115d68e5bf379ba1e4d5511ba116076 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..990e0381c8656e505573442988cbcfb2e67d74fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a84f63a64aa1402f8a27c5fc7d9f29a4a93aca2b49c3281d2285b979b5002a +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c896373db08755479b89b48b0462d8deece6015 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:700d26d64b98e13076bf4d1732f00201eda7488be215c50d476d2d92465e6464 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b96209a02088db74cd15f984c94fa61a9961a4e3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec1a93c8ab7a440c2ba7cd2f3813bf0a4b4e15f8f62f07cba3219f61228a0de2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e778cb33b16ddbd02f6796d7bcd9013996239877 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c27281e81daecb9ca0d2b96659238255aae67a64fd7db1ca28da8d309eee3c2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7eaae00a18d6a33c8a08d25ada55058acffd7d85 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61560ce98c109d9b2c86bfb4e6daa2fad034e94ce245e7273e5e9b19a26d827e +size 2716 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e8602d9c6feea0e8c603edadbf92cea09461b9b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6c39192e9cc5ed43039d67ed52986ff905300d79c75811feed7e8e4051c9ad +size 2731 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..01b2fe770f11527bbb1f0706297c314c190baf74 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf45fc5f21f41f97a60099ebf7c1772cb6ffc4b457406cb65ec4fe426734322 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..20b60c9bf7c92e4c782136bfbd71573bf3b9beee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4818a9c08261f7529dd77a949b103cd4db5bda0237d4a825bd8af99d71698d36 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..048912abbba78961b8855b8a07398b5e69a8cd5d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:053693514cceb71b4280d8eede5921ef6c760e3407a34b128e35c3d776ff5df9 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b60acaa7eeb5f9c943a4e592cad48e388b99f0f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24914ca7b7a52a8e1388c7079d5143b12c5031df85b8cf74e5755c582996053d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..509590f0dd17f66904a0de5f1d3f9f94da35b1f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a86fa3c7eaf813bd54d7cb3fc9423117e6ad9f73ecaeae9914c50a3d19d14eac +size 1180 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..967ccb0821285565be28b175e9d8c38e9abe31d6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c942d5913b5a3459bb70e49a33c690d57e316af42de768310b8442f66f2304b0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..60f268e51f3c290297ac703ff279bc1830162006 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02904e1a3641239dfc83e715862cf3e8344e04f5017e81dd21590cc40d21cb21 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.38.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..981a834716c03b704aaf522dc70d2dd8a4095c1d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:effde09880a078e6c56149a1ea6421d233b01f9227010f31899e3934f6a3e706 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.39.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..37f7a835b295c74221608303defb5df4e0a7389c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d16c9a8736e22ee7213178a3820169f2b3c32832301084f1db5be2038a45fe +size 1195 diff --git a/global_step243198_universal/zero/model.layers.39.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.39.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f852cd474e9242a846ef86fcb9e0baa95619680e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f24127aae179611b8e703a05ba0e3270840217e327fb74e0545ada62ad64925 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.39.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.39.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b79aa5caa1d0544765c34ed36573297b0abbe78 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3259cf20b4317f88b6320f5bd1661e177966b71968b2219af3cd5600355ec464 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e89cff86e80062a7ca3073450a6f12d838442814 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:603f2bbeede8ac668664cc2f607b7357f805e3a9ed3438171041d95924e45f69 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..822d949ba52902eb976b7525fe4b630829385012 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6015c5c6827397e873bcadfa6185b3608ed0581142ce5e73edb306faa3e98a8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9d6b36fefce08f2ccdc1397f49774410a37cd99 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c8e07f78766457367b2ba8b539c308349e6410a44c44d5b96f4c2f315b7fddb +size 8860 diff --git a/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8611bce50aa902e73689eca7458ed4646bfe0f3d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fc4a33ff0e7c4ffa316f8c2f21b69e4e917cb54a5de40c4388afb925927ba6d +size 8875 diff --git a/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecedf31900334c37fd5abe1ec181e553b7a2a258 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7704d38c88bd17f3a492d87621658fcbef2a31236194af14425fac6f2f2f4356 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a99baeff860bbac61cb7d12f47a20c6fb0c0fd97 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42645cf73c673000334940cf92562b32444125aaa396943a3e54c881de954f9f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e385f13220d16f31e36c600bf5ded39998b3de28 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:263390176600b53e37378b1735937b7bb09de5a8b33776beb130832ab857b3c2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c36dde007bbde2eb726f220d3ed859925d7fba8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64744f97affaadb93fd4076baa3ef665dd5a518754bc37e760219b67fdcf4619 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b177e27c7495e004d883a601875db66d36c9efe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82c6049c4fd1e5c0c4f2edea835f1abe3cb20ae0b50b5695c6ea04f37601372c +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b53f44092c63e94b21ea5e5572cbc737fd6ae2e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e02b98605294e018c9f2a89757196d814427ea94d28edc59696acbb535557e53 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e334ceb3be99128b29e589b1bcdc005dd85c404e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0ace5b488a161a9b0c61edbc117dfd09fc27d419f063b597f9f6020e7a0fe7 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c967bff099489c9e6a3282910bfa0ea3645579f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad87d357b99b7b7ea12866c8ed0a8463925d2d4204d21140269be5fbf0b17447 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5750019cef072913ed630826f09e07edf97b26a4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:425394ec20d5b3e862c198cdfc64c250d01e108019c163062accdee301da725e +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d277dc05079de78b0550ffc396b610e4efeddeba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:643c8d2744e59800db9443445aa704d839160adf8f936b5f3bd1495ee03cf117 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb4f96a2a13d1a2b6d9c1e9260a32b89cdc5241e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5ae1059465a5b5555fffff5a3a5ddf956037ff0aa62811b4505b67d730ca57a +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a0a2872b596794a8fda544c945e6ea0dc4f3814 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fd4eafd10801598ff97fc40bcfd8b6dc711f252a79651e19617fde59551ca62 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..eddd4b41dcd8d6a518566811d0d685a2ddda2f05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c150a4a931cf7a5a99002e08cf9b482bac1358e7c5d3d82ddf5a678c1f4311b9 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ae7dc41dbb0e02415ac47c6cd2685df63b8e9df --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d84692cc3f9686f9054893e4c2f870558b4daefaddc0086c4e497aaa60c4f91c +size 8860 diff --git a/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..50ff090289d999ea1bf88638ac243e38fffe3b4d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb1cb860768e220d4a2b58a84097bd3a0efd73eaa298bfa05c2cc55f1d019bda +size 8875 diff --git a/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7ff2d1b4261fa07c8ed13e115b52e701bc0829b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5894c67d512be094e68139ddfc32ea0e18ab2c85b5dbf32da038f7d0905a519a +size 8781 diff --git a/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9847836edaa408a90388672a01118f4b3a90e00f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98e514676900013fa606e68d40158b4488c1ad4842701042d8a8ef6c889e48c0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5565b2f7ae918c148cf977466b70ab27dd6e8203 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a36589d91ef537a2fc97d09f568e873bde952e7f474b0fab05948b9df15b3dd1 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..79366e2d8e902dcd5bd62a84b957a257b03b1805 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373228f5ef1e031f6002835bbedbd79f9cf8502080ba30c809d641314665a1ff +size 1165 diff --git a/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..10b6ac430e3005c0fcf1c006cddfacf94e2cd9e0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faf52c005faa73eec1e2290eee72ebd56943529210980f99b5d918f628d165d9 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..14fcbe0069e831a22f2be71abf770705ae5ceb94 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d9bbf8621e2c4516866336f89fb2deba258d9ee8cf671979d327425431d7d30 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef85f7b12fd8803149ea2cfa9c6489648e2d735a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2db5c4fa6c6f20526a15ab3cddf7d8eb747b7d1371c57d93519206176656684 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..04fd8bac222c84022b7151ee9410ad7eec2ffeb9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:754424fa0a305b31c3091f34fba6298f2465c8fe0d93f89729a44ab011d8cb16 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4762f9b53168e3bcff533cb1650bc481cc8fbf52 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54e5503ed7beba2388e063a7837bc16fd87b4d3135c57722bdf2a655d678a0ab +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba6740d4770fb6e2c37da4ac15b936fe16100249 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ff4307b030214356f1aa81629d6c823a09518d20d6255d0e3ca5c97012ed3a9 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..18f3131063e147452d2fd389bb778db47a1a4803 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8f4340a4ebc0951be758867e7d6ef0b1967f07365761bb7b3e33fb2545a5e5a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..68823d13159de16dcb166d40cbd43119b71589b8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edf83d847d9ee3cffb988c46b3b9dfac53391ccf77bf641d11450d73c4708f90 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..da3191cfe8c773885d66780d471710ca4d52f0ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d1a4c72253c451d730ad43750d79b463c2d589667cf2b9a099cff8024ad7ba5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..462e783a3e849aac5e1e9053e4c09a7ea143fb37 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71885fdc0f073c8528b46ec915f1a36e0529ab332ad63bc3d0b366ab9aaf92d1 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..10aded0114c3f0fa9e88b22e19e561767d49a825 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b834dcf7590429f3f9c3ae4326829306c135c6f03d297a9e164d6f5e60b3082 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..61eca5a9b8c070c9f5ddd274e2cef5399081f5d1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09a20f92c31f6a607ff7b78ab8eaf9e446a63d90ad22c240214b511d4ee429bc +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ad30aabec569ef3f98ae013ffb52858de999529 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0f2ce68ce463c60f8a020685e3b0fd4d86747486a2c3cb51f38a17b02b7f573 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..247e9468e449dcc5d7f9c7eb5cd1c547e7419a6e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97853544124fe39233ccbdc0c269a3f8b236de6429d956765cfd02b3c4aac422 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a9eec5f8d6f4c92301c40f0adb0d351e5668321 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4db5031beea8ee1800f1cbd925bb1d51a50963b6437ed584a336918bffa0972 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c957190760f3c6b0cac8b2d6eda489bbcf1510cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9fe9a13999fc0e6770a2e146262c426a7e9be81386d8199108a38eebc25547 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..30e9c42bc0eed6f20792c8d02d8103359d2b2634 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfc789f091ebe39a86dd119cd1e1d27e71cdbbc5c243cd7e96bc7fe773738ad0 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..106bf4bf23cd1a19041fe1e78221ece1daebe0e6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c594f49a10a2ba1753eab2bdd7440155900e7f6be8e1a3665a73894efc4e708 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5ffd87d2d16a6d2b40708d1c221691ef40f5ed8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f14d414f100463a3b69ac673dc15ceb747f1487f2804fa31cb2edd053c99f5c5 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..00fcb9430fef6707e5950ce3369b3c40a920f2ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:864aba437d1d89fd320cbb68bef0e5d7cd34e94a67d3f6b424c6cd9cd95de199 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e7ff56bd9a4f1cabe43a313a71473cb964f51e5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f45d763bea75a1955a1c70b455ba1e0aeb4c3e096e3fd36f2d3f79ac219b931 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..37b533dd73f5cf113c4b32b9cd5a774baf27487a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f0aee85889c5a07a05c242344177551b840e86972a98788add8309e50b19400 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..08172d4062d2e7dff09e6f8e6369b41ea02c934b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:921e06f04f5bdf3c0d93b7f1fc2dd295d6adcf488e8bad1633f4db4e80390642 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..36f9f9e8aece670baa24fbd8a0319b32a38c637b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a06fe57214be597bef4653f35c28749219bb1a1ba9790ab27e5862fb92cb1064 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e38c081b2af1ee3d96d6931053e4fc3252174523 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d22aa1d2cbf8c684fd2ad8bc6d7856ed17102be16ad3cf70015db4ec36f611 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9798f81b41dc481293a4bbf392832739d5acff5b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a23751f86dbb07feb4d3ceabadd518fcb67db3a9687d8f7645dc751eff5d6455 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec73e0115289ced5d17aaf0fc4297fd20ba7317a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6480ba85bbc020bac8a33c5082cac041fb5577e0cc21e98c424d15f94240ecbc +size 2637 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b1a862eb5a5383223491af7d1ad7416d1e150ec4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a8eb7039eb06ec644a71e1b8ff2e1465ef68c6d298ccc4c0faec6bbf6320628 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fe6a56b78a3e5f9697e8dbd0a67044f64c4cdfa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2eeaa46bdc7901015945ddb172ffb8b8205fc8a5788c407591b7bcc2cf1e743 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..783c2b2014a9ca2aa99f55f2f19b777ae67146ae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fba6f18b019366c87962fee1b3baa0aa1b8849e8ae4758b5d7f8c6d55aa6ac0 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..be59d0f851bbe1b708986a03eeea38a24a5ccb93 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2fe8e7ccfd0cf3edb08d6487902d8110e82730a7312485c7e1765cd8c5f250 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e0c68cd5f5e547a507d91a99a51c712137a5155 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c152859aac9fe0d44251cc9f758a8af1c644a025f79996d8b9d85f408cae99b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a42c43e3ca666ea9bb06b8e98b7d5475b732ee6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4145edc63e544aa9d3455773bb30fcb92d52179957c5a4fb6bed690c1b5df693 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.39.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f119f4c2c62e863c16c937cb8aa3c03409d4902e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e93151f53b055fb1bc536d516c44ad2cebb693855bb15930f80e1e6ba15f46f1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.4.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..10a4c73896181663c7a9f27b4332c5e3180f4200 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b837ec7d000b8db486a7a4a877ffab8b08fe259a251c8dda4f249db33440481 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.4.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.4.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fbfd5576f1f7c7d415c72382e9c6f386886e6219 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6ae306049a43374d47cb195dcf2f1de8f888dbea74ad1feabff4540ac850c6c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.4.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.4.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ca045fad50f7334eafc6777c67e38c6ca9e1b1f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa5666193eeb6f5e1228b0a7993b806c631ccc8f81d4b9bb4bfba567fb18357f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6057d29f82ab0185e9bdc82b26b965eb7a351e43 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5680a5bbb7f4aa07be0a2496fadc4f4efdd20ae6df578a371d54be969db59e2a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4e1fa1ff2c3c936dab484e67d733fc8b84668b4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:466c77531589bb044e829042256d5153a624d0f26f76b1713ad76a92f9456114 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7d3911b9be4270ce7a37c5ab408feb4dcc89529 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdbc7d1dff2c4aa76d829d17745162efd5d6eae9518a2f9b4d0aa5c29fbcc325 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..411d21a9d799ef3016a1bb08f3b233f271290143 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94dcc1ad8557ff518f778cf3172d83c2c4f01c94d7bc92f58cf4faa8815a6392 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..47bf732c952ec43cdc3c20a0d34e58d20f870d1e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29eb78872eaca41173c1c333eba9b15aaf854b534256fa01df31dc12bb512085 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d0b554b13fbdb59bc50fdbef89b50b0e973ad76 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0d186f8eeaee45c5e424401b5eaca427e6a81f63c06228613a74be9b9012ca +size 1180 diff --git a/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..94f951aa1c5e00e65a34389c14a5163bfa8243e1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42a911d63cacae76c3364d23f5d9674493e40dc141f2f5082543921dc3a58e78 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9582fbc96fb2c867c7363d2763b8025deaa466e0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03920123285043fb267bed338e7f04af632e264eebe157c842bff69b23ec4000 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc9a88f10d87863f4ea779791d83b471cc325e8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a200a2809d2b6ad597ba4bb272da96969fa1af5e0aa813761ab71025d81cdb +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e17e9c1578dd27ab2c7ae3e9f3212ef56fe2bd62 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:574e21571910356afe93894594a554c8a81ccd32fdaf3b06f646b313658acee6 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f07b12c6d8075356326d1674f9f3eeaa782400e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12d185717993b6d94edd8affd0a7cba5a2b9a986f106a062466e79c982af0c0e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..01934613500fb5fb954d7285de77a633c18796c0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa23439ed768177492a30ea64c7559314851ed8e226a063495114e21a823a35f +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8fad41ce2778a8a60ce367c07c30af12934e6be2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49dc824920ec5534922404ec30ff1469fd250a68950632ed5659beb2d79bc213 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..58eac66b146cbf17164159c629cf840e6b94c25a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7df1d304d735c3e11cd1d4e0b3598f82635c9888f29283726625b1184787ebd +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c33904e41fcb27a883fd358d21afd6b81d130325 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c40318ea118456ac11851eafe5c3a1726ff70e1343454b3a909af0082d4cc351 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..664c962eef4ce38ab002bee9b492359233a52e1d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb2ad2f735b918fdf5f9048cefa97bffda30ae1e4e777c617967e4c3d0c8f7d7 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddfe0a75b52e850266cb0bba7486c9c21626fff2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc837b31ebf64839d7a21afec2cd28c0af25b4d80ee6b45211a1d712087b31a2 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..699ec38f55dd3c92cd4537ec45651a63e2050e93 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af13dfc3491873e508133efc662ee4fe24bc6fc61e444c8a319bba003051802b +size 8860 diff --git a/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..61aaa5c1ada79dcc61b9ecb3662677843d9903df --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9a42fcbbf91e7b71148734de743499443df2b83a3bb38387d485afdac2c246b +size 8875 diff --git a/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fefd2d92e5719ef1727be193518aae55f70985d3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:240eac2e8229f8605192d6b42131b2ee96a414798fcc139aaaba9d7797b8d56f +size 8781 diff --git a/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1350be0aa61c82512e4112a0723024fcac3ba24d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb0e369ef4e1b458e192abd8a2ba012174114508b0a9eab94b8d4ed2dc92ba4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..87dbe369f03151d8e29cd4995deba2addbec8393 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0f1192126f6207821631a1c7a1d6101da8808ae68095eca6a5e80bc5e4ea7a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6d2e074361dfb10328ae5a9b51ae565a3755677 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0cfe60fb398f91924a7d875586b0f246a423db6d620901e1f8cac959a7d7f72 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7ceb7c2f0af1c7a61e038cf57f0111f38f4b56f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c026a9d5d3cc640c9408bf5eb47d4f00bffa4a7d6a0b916c39d5da44d2f751e0 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..09690dc8eec90c56b9c49a4c646379d5048ad548 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04479178f09d1a009776cb266e4a6705d1cd69defb39b7f0ba390a6cbe6a512d +size 2731 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8199ad787309b2d31951955199b72a1ed10a033d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6af8d38ccb18838604bd68cad002b37ce63d587228a056d90817573c7dd807a7 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd5c49c0dc8d1317eef86356e0ca43cafcb78986 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd9dd1edb535fc419324479e8ac17776cc857a886064310c254c14897ed10bf4 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7888701a1e42189bf4329f5c4850eccee4e919f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3418c094199f5653f2ef5e023884b05e18a82cb3e78c67c2ab542769b837cdd5 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a9402281293d9ddcae609a1e724198143053987 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1417be5c4dbc99c5659cb85cb2559c074717a238231d20301e3c03f6ad90f4e7 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e5e3d358dd0e4ce888b93d55e6bbf6e2a0c70c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d83e0d035655e743cab3865ce54c6152de3858bc987670b000732b173ec60d5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f221ed5d7f986d97ea6711386eaaed4043763251 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd22961bbf94dc44615621c4ed71b6c05ebe64ec467aac3fa56f653bdb7d75da +size 1195 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..87b169b305dd012726dae35bedbac64049956a8a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf9afd965fd8f8b68b1fe7ed2ad64c25ef325196f6e0d4a51769f14fab219cb +size 1165 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4abebd63bdb300f832443f8c214e97661c022e0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4044b5eecd514affa6aae3dd432a27dd7e847e95ee729f88c4dd0fc6aed3e4d2 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f39fe2ebb946471bed771d233c1d13db2054c7cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff688f80786c2b1855e0bb2b5fe347234179d53acea3f16a06791a346ba1bff +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc47ec2ef238e07df9ed2a7f525925f54daa9e87 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e25a55d9aeb912961e6a8187034dfdb335b0c2c098a2fb7e528ade5138dfc20 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf6f5e9ea65b19e76edbe6aa3a675878c469b088 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5716b006e13a50f4242e7553935ce8a4724376220b2209f18b4dcd278d97d7e6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce037b17ce6035243ba948b8ed0211425fb07de0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95093948260dce29f68e987efbd671f9feb83ce73c0d10debb15d0eb53b31492 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..696fcfad42563024bfc35bed10e6f91440596605 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3706962d20a2dea8277923bb37a02de3590f0b770c3ecc1b823b7df85b42d163 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..511ad2fd6f718971c4e10d59c97936ddef49dabf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aa89214a615378b9ebb6e0274e74862c39e1144dfe75c79de6f4e16402d3d00 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..28207a4e71c887e1ea3f0bf1efc18208627759a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14abb8ff8ed8408210a12d5ca2dcbfdd53e3c20688e8b0b39bf3eca57743527d +size 8875 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8b42cb268064b412813be7a9b146dae47db9487 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15ad38a1df8af438b4a6b546529ad35819d4098e85d722c2e406380604f2d38 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba1a8d97e5f46c41f820cc75610ec609d25bf62f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c7db0886724948ab18479bf99a64b8debd1a6c8f058124a61f91c51ff313087 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e2d6cd052915a4b1a4b864ce375501ea107b820 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e4a22391bc13a253c6543447c00a9b3352ab75be1c3bd004384f56466da3d4f +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc1f6ab1be35e353e0a31de6388d6515aae49b82 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a3844b7535a0e1aa428141f9209307ad882c5c1a55a54a5e3c9eef55392a88f +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..65722f2d7cad2de37ede7b1ae551bec0fd5eba47 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28193a0b3c8a62cab5da300cdb5fac84819cb4a56527213fdf51f96b84b7c72d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d315a10b5a863335846c83d9f5e0b28c49ab510 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7b7e3fc52ffc66e018687c98e83805344d6354b482c6a6b7c77f0102fefbcc +size 1195 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..83c92b3ae7aa9eb33ba79512598fb94ff25287b6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7ffdcd801d79c65536bdf76db61fe2b6205efcf3e76227c6768e7a5bb5a92f5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ce39f8e5be53770bc02a1835d02f826b0fbf077 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac75a601c52b0efcdbd5ba4e576e086d431bb26515907c1b98fee9d2d9cf38fb +size 2716 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..52c370198641b87fe697b467cbf9825ddf7384bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0483bb928f4944413f71af05d5886186cefeaf714c3017cc426c5e8119f1306 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b41f90a0e505b74896feb491e597069586b834af --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d135c9dd941f1b6970ee0757fe86d06552073af044cace607de8db4ddf1404e4 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..08bc119bdf8a51096c3a1ac84d3285d87cd1df11 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:142314ed046ac555c14e8ad5270eadc857e2c1a0568f94b766bc45123c5145f2 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..098fdb8c8e343ebcb0f5a55766e0131dad141901 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3dc8e1c07ccd3b38744a86952920627a07793b15a47891674169b84c0e31d3 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..74da93d73cf370521098a3dbf1f08eae90f5e34c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31f67f5a6703d6ad394bfeb72ff6032e96eca3a33289c5edfd8ce24f6e8bd04f +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..169e90c297b696c8582108b2cc59492f4c399422 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1475d826502a79d3efdb0f9d8a6af00bf39cacf2f743fc6cbdd913ff41406a1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c117fb664174178f059edb03fe723969a51db24d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed612de6a9d6d7ef51a0a776ec8e7842021af7978bcc3c4e7fd62884d37bb67c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bf71c749c8e7eee8400b053abee427d846c8675 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5109f91955c171d2beb64d731160a890f08659b8bc2cef94aed3a94505f3cec8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.4.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1158111c7e284762e2138b1478122404b966aef6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa115ca01c29d408a395c28d51b0d64eeac4e7e08308a5db2a6a1e89d234063a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.40.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3052899dff8927381ba376c2dd24cc7aa4f1fe7e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:442d809f5375a749c563d9a2ca208cceafb66f2a0b003f149cb2256bca440484 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.40.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.40.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..751643e162e3fc55af5e4c9af5d0f61eaa178de4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cada3b6412d15230979d3c60f3b014fef30a45b5ec3349564e841e8de94378e5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.40.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.40.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..99ff5505ebc1026a93828dc0ccefb16e22c967a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d266dbbeb5b255f4e731a952591818e8d995c50f7ec3f3db87ac005585ea0dd +size 1180 diff --git a/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c015b5b3fbed1a056dac9909db3b4f76c5bd7fd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f24cb3f572676072b552fa7c547402314a0d32fbcb80f6b85986ebbe704b817e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5ef5ca870b1df78723c43f9fa34073da053e1002 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9ce5735e5197dafaebb3500a71d51896626cd788d5f36876033b93ae7d3905 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a56c4902a6df7515a2aa85ddfe2b246e7123c92a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8a6326ea91ab9a05945632d24b7f2237a9e59c14605973be28dbf1fa7fd82b4 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8109a3c52ddbc926c0a2e9ed57bcbac86cc35675 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2b15e0b8cf33d9178a0f45172ca5dd1aaf6f86d95726119c219fe0a34a46004 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..336e3206e3b1a1ef3abc6cf60b964ef8c0a02aae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4235b10ed936ed408d4aaed4e01f0f08404d04f7968f7fa03962a98c0b73500 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b690b7603252868b76e0337df917b77c427329cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a99161db0234bc83dc7ecaeb2c5fe24a6f30f2a7ffbc7585e60224ecfb29a3fe +size 1180 diff --git a/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0cb1c0737688c82752a5e23155858d6b407a4c05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e99d54b0ab240e5d028724e31f8edd6321393bfa2a3c17b6b30fffeca01626 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb35a638118a04c3cc742d3ba9dfe010de8727a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86e9846cd7feae4857dbf2e86ef308989b34bd28f510ea6b86d4bd701a4080ed +size 1165 diff --git a/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a251da2782eb1c7fbde89e57ddc2284f1a6b2108 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c49f87408bc0e4e903d6bf770497f2a8e1beb351da8e88dc6c6546e28643c856 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c1d4651c14d3e319e68beb8df737df5af7f7a24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29f0daa877321b772d047242eb7e432a272549842a21de13a48c4b60ef70a459 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..34ad02cbd84366bace250fd88ef8d23ba0cf2fa4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8408781061db8d1a610478b12ff77e2eb7fb424eac721294ab048077159d05 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..66199a95bc40dc70ba2beeef0b4d5b7ffb16623f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6233a8ac51aa4473461563cac47cf2d62605cc2f0861cff3487e1d8a78edd293 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb2ed17e840a1a89d55638f6e2872ac5e154f9cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25dae281592604f69fba9c4fa8aa43deec5b6e5cc723d3058473cd098d29826c +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..58b2a719dd94b8c8ee1926120609ba974f5ede3a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6b76b6840c3e9bd2441656f90191ff04ffca8336bea51904b5796f9a4b44fc2 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9bb3c598834d36ece8c8161863c53e94aaa31a9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1f3c8aa16991e69575ee7c070bf1b6bd3d6ed0a560e199b2d365e8fa6462f37 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b782324dbf4fc054c0ef19c6fa0fcdbdf188a43c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b5e293ea93648116adc9e66146394d63fdef53c5d66d68fb30708ff53c767c3 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0529c12d7d3085513d729c6aa715d152b33b0b8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e72e53f95ccda2ebd11e0a0fb0724269581d54938d94bc65478abe1668043f66 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef1b800ca854ed1cffcd765efb6109a5d9df8f3d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:570ed01575efdc22632fd31f4213dbec81ed922c5df758d8ac1cf2ca04d0b9ba +size 8860 diff --git a/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6990313b6086ba097f525d01fdf81a0ec6dcc9e1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb6de19c9096bf3feb72aee6a926381fddbd1036e55041e3f7cf9ad985b0e6d +size 8875 diff --git a/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbadd9bfb0260d7f3e54803edcc9bccc7adfc273 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95a1544a24ce24b8d04be7c432fff3172f2debe45dc4306396d6e375d623f4e3 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d55c849a08bc5415dabf9823e1a8d0ac837dc8c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a926edb8dd2146af069626dadc55562e3fbad72f107bd30c33966c374006688 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e15d917cc0aa975677e1e6205999700e2e9d8cb9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85081b4920f00f1f4275dfb359bc8bfc777db6cee5489e595bd572fff97c12f2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d28baa5dd7ed55cf6eecaa64e9bf760bc631a059 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d5689ce886e446a47a40d3e220bc54268e41a361702733a7c526346a0b9c508 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..138a920f40e3261e9fdd98ce8f96d10dce0a7ce5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94ce28742d1f30c03395f8edacd33ba66b13795668184488d66670345352e435 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b11a1e1ddff511b714f1bb11850bbe3a91d91a72 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86f01ba7d5cbf10c7e9fe6948ebfaec31b2d3f275a4bc9d90fbb8f85581413c +size 2731 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f37bc8f20c871b82e9318d6bfa74807ce0f2a984 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8c24decc58a1cf3e40f875cb6761af2b9ac0e230859769e07cd245120cf23da +size 2637 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c8ecf86e21e93e154cf7aebed6417aade880fb3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b00408f65bc3a426b56d1538e34ebdbdfb46ee9de019fa750c3235c0451b1e8 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..11b7798158a77b460160e3063aab10889bdbb259 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c82b10d07779bcf9e6cb2f3f376f9b85ecb1f1e661627776a58adafdb8715524 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b015e91d3c0c173ca375d4064d95179b9d3b60b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64009e04505f0a3e7cb2c42259c4e7a9dcdc6efc3897f37b34881969c508d068 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b87572117227776f821a7b74fe40b4ce43609392 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bba90cc4ad2345be2c83ea0e61e9f5bb3836f0ba08034c25d74f5be0ebf4611 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..89d7aae2b21d51ac94fa12571f01552a1f5fdcc3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5e814c6770bb2fe6520b659165009d72eb27b016cd7b74a064174dc650222ea +size 1195 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b2ba389b28e6e2e874362e3995e396670245839 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f64107a948b5f8754e4ba20ca8f7a56bf223364745f38d4972a5e68c81d3fc +size 1165 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d620845626988b024508ec61628612dbda7c9be3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e2e5fd3e4c29e967ff2147693b69de82708752a0899fd65d3e8e11444c8cd68 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e10d218843f0864a80f83509b7ed0faeb7e39aa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5308fb07682bc55bbfe6d4b48e92feb3e06bab00ab0dd52423711b0461ee0d04 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4de659fb55927b810b1d3139ee9a2da7861f23f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc3d20683230e781e8d40d0ef393d0ccdfeb8fcfaff8be624f8a45c96dd8a0ad +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd80f102c07ef2573f0e8c48c1a7dcb276f274e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d1c3dd08d4746ea2ddbd8663b18b7c984d1f29eb35a8e612bc472b3e6a5d240 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6df3a350fd822d95ecfbe426ac72228df817e5e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:323e8e60d58e0ede2172d6ad6d83b91f86cae769c4595c7c69e11660eeba8670 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a7203bca9be0345a679735feef080546b39688c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebd4a09d28dd90de30e96391a407d391dfae0ea4dc641f2a84c52be30b100204 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ab1e904e899dfeee9cd666b359e5a6c69a56b0f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:119a8229acfc99ac59f31bb3353ba2e14e5aaa0976358e4fb318af86275f8ca1 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebd7a9d91ac7a242166f78c78e87b27e079711af --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bed0cbbcf2bee37670d86ed23ce31cbd96f69f50dd31ffb9233feb7cab76dc90 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..267661c045502ac6ebe875f4f95c530dbf5020c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f03f9b8e0db9ec3d83d0f9deaa9f00909aa559112c0d4c39e124b6242e07d6d5 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f10a5b3a7ae407ba47fa6d504485363c5d8e0220 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:248b7f26b382a608f89c1f918cc872d0f160a42fdc6160299b7002382ac6af8c +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..19e591fcdd166c47879019be562b7d629b54441f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac0588ef8f0337365d3ca6a6ec2ec3b193f912171b3568ae4ee5954908d47e97 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..070c151c00d53f1a6f33fdebe086f50aa8a0dde0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:579ad1d2f976b66f106762cc7d74152e0cb90bcc861c44f15c63101cad165151 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..43f8054e295f48b0d0b54136f2c28aca647a3f57 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8707ca1b5cde6d7194822d8f0c54e367063df54e456caf0fab512d8ae0d5f370 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ab4185fd6d9acfd27287b549804ce785f10eb15 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37e77cc07d98ce476d790e4c40e80d2c00bc70b978d3fc0701771ae96092e71b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2299d5e067a54e8e8847d06dedc10ec200b67521 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36a7ea397f0113f1111da00800d5787caf59a27a7061d526baac9f201a2d9f88 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..32c1ed9319a32a4a182eb3908f4e5c8d3f2ecd66 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:654ff2c1ad4a54d288dd75a50921d6086f7dfd3c1c95dcc55a24f2038108c6f5 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..35b042bad3804c7d4ffa5825cced461602dcdcec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97aad0dd04611aec604d92b56a5e2bb40dde1b2183b5343b55e63f9c7b481edd +size 2731 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e86ddef66bde54dc2705c89e2a93357a3852a21 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d25ef10bc48f292ff732fc625999225936f5abd98545d4467bda3616ea0faa1 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebfe4b53b5473b66e1022f95182f6b1a2ec574fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b89f3bbe37064f348c7b57824f1f4960b78ac6a832b670d6d9a0a0b0151904be +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..eafd59c9de12ff2e9226cfbd5368673ad67484a4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb9f230d079e6b7d2332bc4d4ac0c857573975b717095151976d5462aeb38cf +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..915e414a57bf5c42b1457de8b3b42500c20f8a05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e0b91c0c6e85852eff013218315f94072530f773b0f4a7fd7b11c43d057c4b5 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a89e9a2b00c54c46b426ad4068e5556c94a9f9f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd7c39d1401d100ffdb2e5c6650ae6b7e092649414e523aebe488033f164c21d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb52271ba780129f3956089472118378cfac1732 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f75b0a041f793cf27962adfca80f38417058dd898c4cb215361cb2b1f52fab3 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1497b7cb200d0eb78913f6af0e80ec48e8b0a39b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:677bbd2f5e1511d27a185512009ce68119531309c7b49cff3cdf253d99a5e278 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.40.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9c62276b7a114c7797ecb0449e836eec41f05ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67b9439cf86ee787c3cdefba08f6ff64d460e5d45a6e3c6038d3c0ab3f0fe591 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.41.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2de4d3a5173f9ee0140a98b3317cb8c8b2b0fdec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da502c4f395bbb67450026eb8cb68e9279f97b98c5389ee2cd60cde856b50802 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.41.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.41.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..aad43b745132f826c0959aa1172d06a9ea7394c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8e3276b3fb09481eb86143b874f86e8462df613f1ad92f3da965e420b9aa0ff +size 1165 diff --git a/global_step243198_universal/zero/model.layers.41.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.41.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..08c954dc1aaa113655681ef2e76cfd78b87d4c80 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf88945f235329738fdf3f465c9c9afa88c30eef821f6cfc1a9305da5bbab535 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..584802af374fe3a67967befbdaad234654679091 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29bb244d27732996942f00fc49b0f35af5945647a2fd6666a4ede076ef74e8af +size 1195 diff --git a/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bce98fc882c9fbfa5b9808fe495f214b34d1220 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36cb30e39326c8c12cbc9ae4e3bd96a44b8e734c5ebd364249a390ef7e6376ce +size 1165 diff --git a/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bf28662fe15f33bcf01ef196fba22919fa25a75 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:632fb2baec9964379cda1d3688408fd62804062461b29c6a13b0b21663b43a73 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4af1bfed5c1606a3bf2d91303e660cec46dc0d1b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16b3355a77af1a543433c59a73e7752d9c74360e4b2edaf2b79107995b326e9c +size 8875 diff --git a/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..48ccdced47fd1025e587424dbbf45a07a279f37b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cef48c8dbe326422ed65cd97d0b26d7b9103efc1b208971084a098dc8887df10 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd7b279a94d136e00066c8fe386f3f955a9258be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4bddb8d118c8559f8a237b644399fc7accc85b00d03a2398d27bb3147553e67 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..07adea242b2e9c4442645f8a61ced27ce5237bda --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac080efb66a0b23fe278d0f78d29006eb38a7e1a8f658cf851a40215a18df6cb +size 1195 diff --git a/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..973a7199a27e2cac627347bb9b2f0e23edc85280 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7351d205fb21c62bb5432e24d858cb3adf967b485d8ca83565377755d03f0c70 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..593f537bf2895daca2da9203d0293c2735990d43 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:510f6de49248ec246a95d779184e770d4aa4f6768c90e896e838498c814dd4c2 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe260db8f0847abeefa7f3d1dda2e5ea3f7ddd17 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a192613fd4c1d9a54a94526bc1bf9775754f71f580822f25df500f8510e47c89 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a6d980f71d0c017320203cc836d4470da67d799 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59b321907a38565f645d8993997654b3675b2524395441f7c86952ba3336c03d +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..368c14a3dab605c9fe96ad8d2f48dee4821fc108 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfe59b7aa9f6f02856259c9a6b65e88cc7e072ea2f4542e30bf562a8098441eb +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..13355ca89ea715d2abf57cfeee6333d8afe4127d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d1f04018d8a9cda2afd8343d22533279b48625482988a107fb077f1d0df8d36 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc128368322e5b575d4fc9fb951c7fd81327ab0d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0905cd62192a415b5c63e5ad701884e41106e044c727530e00d0ed8b8edcab88 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..23aee6f5b821a532a150c627e3d14143135553a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:806636becb8df06e3fc52fa2530d77a0c8087a874337bc3fe14767cc297a3b77 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb14d368890970fbe10fd35e9b6f859097ad30b5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d14fb9e9abb55cdaa18008cfab952afedee7a3b8f73a60cc53974c8ce08207a1 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..357f8dd16247c7030f18e2e51d6110c3ab629c15 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50d7d57b3104ea8ad74e465ba3e7f2432a2268f9d69d52687431bc709fd76929 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff3e06e4e73bf59825c54451ba200fb4f254b53f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7a26b14fb8c36e87ebdced69ca90b0b5fe45365b7abb950e2aa2a2876838561 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..066beb7319f2ee5af8c55e09315032d8980756c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f35c3eacc748f311a6cd647d0487b8d2fb048217e646daa1498d5b79e8d837b +size 8875 diff --git a/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad9f4bacd7ba50884f712f24a475de6482a1ed9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ae40c365969c18d45b8627103068782671b299fa40e6b5c98feb2499fe272e8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3b4e5cabbd4849f923290633bc256c893175d5f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95364621565120d42aeb6112aa45ba09859f7ba7a2aaf23657226ec6600b3260 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d03452e17dc5e6dfd47f58f881163cac92626bd4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db8caea7e96fbf3cd98cc81ad74e5da285c0252dd65c2ea7f1c6a18f670f143 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d847e5dbf23949976dff453df22dc7cd15618797 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d6cd0dfbf9600c44a7b480953ecf3c677fbab0e93bb8159f586547c17e44fc +size 1165 diff --git a/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..53f7d05eb8e134dbb19e41980a01ce9be00b88cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40cdb4ecd8999e20b76e30fc26fd430b599f046f626078a8073ede66fb636e82 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c62be56f212a736eed61fb7777cd7550a637b176 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d664392855a7ca75e86b314398fd519be05c9a865eaad22eeed9bb41c5487ced +size 2731 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bdfffd7b7172f304f7f673f0a86259a3100e168 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5972c481001729e177b2f73cd7c7bc0d3c2ed878c328353d94d663bf89e1f55 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8f776b8882f3acd105b5693c719ba955cb84026 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d35e285b25ca5b6c88f65003e022cc5411e1d64992a091201733b134a86ad11 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec2574c188c2f7e47694fc2f692e18d5664fae90 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:866de426c8f38a39b099fa0150921845126f278a93c5a08b3da4a9aa7ca96b82 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3fd8f0bb35b872f1214a3d623a946001ac79cf55 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9519d42fb53e1f4b8c8baf1f5506af7c85e1aff252edbdd2e761a0ee2510b4d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..42f1f4f58d88ea8f0ee01cb678f782e80e50d590 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:687963210779b038fc02da8a121c1051b656f82ffff86e3d766c4c4147a5bcf5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c8c0659ca12eb8b21d5b8daa7b145c9e8d00b09 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef60301b34a2d1a6546dca08ce81a8171aaea34d32bd05485191b703983bf229 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..50789f8f9fca2239b302c0c81f7e09e345ffc94d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10bbbcb62d4e0609a1722108415208af8bdf713e46350b9b7b9d98e98e3ff45f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e49a79ee901fd228b53b20c14519267a1c3072c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ae7f2e00f7ae82cc814160e6283d4af4a8d26e42d579326fa4f2842deca5f70 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe7ac9995f06d024c375d4434b7a45d568ffd53d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04ee746a7a27bca0af6482b77f508d54a256727031f9c32cb0c09f442404c83 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..50f025ce8e794ea3ade787b49f133d2398dda84e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc60e7764ff90ea85bd65b665ae1356b42a2eff379410770c963b9d93f4ed815 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..35da42d0156523b0906264fc2d2d3909b9b39045 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38263772f534d620a7e751cdb3312469ba1b15f6a18cddd46d04398b6233a4f0 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..23866b68f807632b0bb7fdcc5dc713ec6e17c647 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6718585011eeab394b95fb5af061b29faeb42a0e275c09f819a1b822a8f8b276 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa0e9d932af8584e65ea5a02248870549e5838b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e0ed308930bfa9836fa76f3c9e9124477f67cd132c1e2a196390b2a4f9f452f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..98a200a739ac0e547f9a54653ada3b4687cf1ca8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9f8494759fb5a6d5c066a6c70d7984ff31e7aaac5a7727815f50f530d839927 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..07e78b939dda9ef72876cc4764b04bf6ac6a38f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23baafb6b001bb34c8f1ff6c17cf2ef356837971983706ba0d8a9f7cdb271834 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..06d4b876cf09222679ac743848ad2f957faa51de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77075abf04a9627c34a277f71eda37fdcbef020420594ed01ea5756779105f9b +size 8781 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a770016c4a2f9a3325d5567491b3e8e09a1c5d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bb2bc322434fda9a033ed9e7335db5b756daa9eb33813ea13224b0eca4831f7 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a5fa90a07ba567865ee6c6ef7f62a343eea2669 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a31529822e636d7e1460f8d8eea18610e4ea1d9a07493492346f933eac518d4 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..84c380d8cddd6be90fc1b33fdd806b9a8ceb4e60 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b87baf2a5daa832fc935ca7fba6685b959de2f37d677ef683cd123391764cc9f +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..20755ec7672e374a5f56a640ac160defb85c69e7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc09fe749c8c8129c898cbaf9261aae2eee3555c8875a7fb43a26a390d2e2e96 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d44861156a60f975e9e9d857ca8a466bd012380b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c50286269c483f07f9a4234d5351d1ed42fa0c1c71a0ca0ac6257e78dd67e6fc +size 1195 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d194bf9f6c107ea060c40e91bd5587a86c642543 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0c030a713fc381e0dd1c18bdcf8e3914320a8cfa9868ab13667c5b5139ad8c5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..074cbb4f9f45d7ea3dca4ae64ce9b2bfbc516256 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dbdd2471279125b839e5dc6210d328b1d090760a72ff79be6672cf1a2d7cf27 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..16cb34ebdf6fc5c6a670b04eec8e7a14bbfc1497 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13aa3e513eb6d37b6885f8a299d0fd6817653ba9c0b7004ce30c4e051012e3b5 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0a45b08c5045d1c7ae1fd4f7369ae132d03ff30 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f83804835493ffc4974688e9badb3c1e81aad724db3adfb047a479fbee2ed17 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e74a2a848861c4cffe663e93fd5cfdb65336162 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d02608912d421b8da081a35d4bf9f0a4b12c24b1fe6f19c18cf7ef5629101759 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdfb0ff55d6db4ff60a7c77bda3c6ff0ccccc3ef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:822c93361e4f0a5765b8340d67775c7d91a0613ddca1ad211fe3a35bc8b72121 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..71e51c03bd5a366463fa617c82a0b6bf0cb4860c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b44509ddb58ab13d84614f8f58e0eefac889cdc64d165d53b39f32596f724c07 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9e8c1fcf55be0ae7ea0c8e97b927f26100609ab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b262c8210792d536d2f08b2a66e6d578f0709f6d6818b001db496c87bfe3da9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d9764ecf34f10f4dad81734a57870899c2930056 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b87582d5dfffd1b24467bf8571bf27ebfa2f9b1cda680c0f320e723599c73b9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d534531b2eedf8a220d2ba5661ccb663972b147 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be56bb0d41410f266bba28307d494b4cd0ef871fb58e24de047d469051c554b2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.41.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..132254df0305b8dfa55167013ac4362acb7cc202 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d5c94478321c14e58812078a767e71017de747da519ef27ceaaa9acbe8be78 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.42.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7be53298a15c924fd39105b45cd7494e616e6376 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b8553d355f0c56463911491168d0c9fd3d42da19cd62be3f99598e5abfbabb0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.42.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.42.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0b1c9f6d4529c15ec45ebc18f1910f95dc0340e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a4936295ccf935060f9147ff24d208ce3b7a7e560da9eff202ca55d3ebded5a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.42.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.42.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b4c069a1ac2d00f1e545e89230a4d3f6e5e2f47 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:624138b1fdc197787a7cbc76869fefc250dff9ef5bc6f605473fed44c725d250 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e9d8cfac915a65b0e662509bc99678a298327d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87820f9527a117bbdd1ff1446e4499be2a31d4cd6e202c53fae6e49b3d2f0869 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b93fdd7594a0264f29c95c4782870eff0ee4bc6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b30516c8c010f160b1be22f6d6814926543d7bf1e3755019336f9c02cff49a21 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6dc34530f540452251db63a1e3b40c730887353 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16c867ce434e482960a3ff30574150523e593015c95aac2c8bb4ffd4112d277 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..85e40b9cf9b150df0292532f707ab854f8763fe4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197273f25f252b12b7a3407b278b2b515150accc9de1f2deaea665a8964a67ab +size 8875 diff --git a/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..828a8c91bc286d20c5965a041ebd905fa4cb675b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44c8967f1d9e93b930ee52f104f6eec9d1a5cc269d7b6569ccce55e02ca6971b +size 8781 diff --git a/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..17277a259aad7e40df36a1a3f4d71102c7c21de4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:831e269f54c0abc6814678ba301201270a4a792835304c9695ccc1b14e4b21d5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1e2af53c1300eef66869f2962afb5203180626e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fda9247be13b68cf624624a7d4c11e1515e833283348841b70e71c54ccd8ef0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f85a0312e89bc716ea75ea896df9f79823b0087 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45ef642e271401673c088b8a05be0e795879ea33263bd88f24e84d91704ca42b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd89e6c5ca80ba06c3c9457a35b3577837da43aa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33341d5ec06bdbe8ef56635eb7f73dc97505af91bbf0108cddb5d109c89ce300 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b82ecb378369876e92565eb577ed10598d1a947 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678832424878d3745d81ede6fc333b88bca70861cf0bab38118dbd546b463ccb +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7988c07036d42e310e4fa47ed3b8a105dd23b51b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97989451c10ec4e8569abb9dc6c4a4cb73ed16ecb3a1b16325b48ab9c9e9d1f4 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..353f701b243b8faeea660624e6f87d0fd0a58a42 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d006b326cd9b8ae0755d74e4e27228b25f966ac104c3ee700f79893d86101ffb +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..60555175944c7b1e0e3c8d995ad9d5cef212f2c2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30c680bd237e25db1e3f4abb56d3624dd579c8dfedfca0ec944ec01f6cb0f6a4 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..32f2a83ba5164a0314ecd6fcc07b6180c205ce0d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2eec253af6726882557383ddd1a38e0c0580dce9786b63df22f7783dc05895 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..803ccf173fcf99fd17c1675118b2d28e30bc5ae0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7635c5c2116eff2f502bfdde3ec13e709323efcd09597c420f8c3671d286585 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d17c1d99c7e8c74b7cb8eecf516f365fc518667 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4323d7a1927fbe4a6082906125782259dff823278e510197639ac7d90bea0f0 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba92125f8fa4ed6fe9186fa94cf083454fd4f610 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c2925638c2f2d787aefb4786129b6477a68ab70ab3df54874af9343ca5fc2b6 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1dac1849514a2d66a9c50c5ce41353d5e805096c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80f802d0964d6baf366bc49840d8946f0c743d1cf144f0fa6ac2c86e7ccf098c +size 8860 diff --git a/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..921865a83e6a66ae2a5ba9d9bdf652df2c86f930 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa09e8c3315718ee9059bb17680fdd6ca4da04c5524ed83fa4ea23bf8e63aaf7 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cf117a4d976146ef0b9b11deddcc5b6a03ad6c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9593a7bfddd6872350099979c8db39397e40b756d5ebca6d2906fa0751614a08 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..66895b2e02d32552587448cffb91863280a8e8f3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e86aedadc55b053310f11aea0d36005bea7fb69b9ca7ca05ff1c6f9a197f305 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bd14890f0f323f98ae33ea6f74df508f889d642 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a212c6eb7c618197e2eba10d9d4c9ba1f57853450dbf788e15afc0b8665065 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..003a2d92be16ee5700fb633e7c5098a56b42cdbb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2bd25002e35db67adf6dba655cb950eece62300d78736d4216ed72d0aba25c4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f07f3d885f154deadf9b44b53e3424b7ba5fd7c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be42b8386d7f7ed73364bba32dacdd699473a708c2b7c8425dc10d17fb54385d +size 2716 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..62508a97a4764f1c9f44e1549fa20a47c7737e8d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:265a211f3707104e1c7e42ab331303bc9e0c14a540c47626ae8c3da4fbf59a72 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a47be55c86c09239dd4edc5945ba0f128c862306 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78d87ff4e4aba6663d91f853875fce330458b0edd5a601d2cb7b2ed920892bcf +size 2637 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..183c7b6ddd2cc69b132ed19df1dee342825cf1c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3faa72a516be6f7bb26e313cb35ce30d8fb5213af128057331198dafa48dfa44 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..aeaf4197a2b4feec2225b223999104a471d4a0a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:efc262512ecf1bc101eac68bf454630ef64296a6f94f9f638bfab4ae3532d9a8 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..014c4df340ca1c7039896ea1d07e4c9549df2a3e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7604fa9d6795e67d7e226da6b8d54636cf5f9d4b9da292646ef63340fa680e4f +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4299fb02626117afd8c143743851f2c6c8342e84 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1e17f675cc929e31611a2cc606f4f06245fe83575049a6b9a1da080b4a8d7ee +size 1180 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c209d93af00e970b06585639abbc70bb16282cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb0ad88f1e568efc1d932e3f32985fc2d8eb609532be474a048a081d377e7031 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d65149ce7431500ee99939bcbae6683d53f9e676 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2dc84c9837fbd50c3708568f9c03a547e6d49ac5a32e96968661c2d4ff52318 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0574c4084b0fe4706fcf97e585ea7290868beb49 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1d72bada09e5e5354e08e12cce9f33655519b02369c4ed992dc705f40785dbd +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9075be686b1ef01d765e5f1161e9841d7c78ba2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:219496977d9272d65a371300b8ccc5317b96b0d7a060215fbe7968ae332d289f +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0699bb85e92ec07ec578f9577939c36bc33ba953 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052e79a688f1910ff8877d5a63da8812418b89898136e27b94cdf8c64134d5a8 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..417b764168ad3a8e434958f9695c8f821450a2fe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc2f4ae6c2711b91f9e3abc366efd757c039a5f6c8194df3fa64186c38e208c2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..90d4de7ab8081acb30c44c33e5de6e0dd7be1f89 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2fc50e185d713d79648a6a0f72ad7572efcb22e978da6f6048cd6a1b8ba697c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7ee61701f1550c4762190bc8e24143f2c8527562 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff74bfab769163b1d8f8e275c1eccac510918c8747e51d80c6e5ed53df5fb0fe +size 1165 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2631a0f81f18e75eb9cda7856e05ff8a13d08164 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f74c5cd519526041b3d1474073207c4e2dfa6e960fb377cde7830560258c8987 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b8c5440fc30d896297098fc248f599c380bbc2d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2098147269be96f08c050e1d8968d4f2aac4baff8af84a4a4771c63a64ff2349 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..eaf3d8596ef56525aae1d74dd6f720db0fb26be1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f27f45fe81b918645c1438b522c10745d42a495bd48cc72808e31239e1f280 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b13fb91d72d7b2bde427c6758d46b38ee02f5138 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd4445c1983610bf2d1900a2e9ac703f9cc719c2120b4ee9860beef2044b5d6 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b05fb3eac24486a3aa708a54905a80de49facff3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bff735b317e7a3a489732b2f15c1e4d3c7b81b42ee5248e3cc3a39ef3ad7cf3c +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..91ee734954fc7a8276bfc5a8ba515dbf747f111b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d867fc10af6e1a69ebf5b002856c071a374231152b8deb006a9312504a42e1f +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d59c33556c46d415c9a12f014bea04a9e471573 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:016fe0f12f44cd70bf1bb2348ae96f5faa539e3d12eae483fde421434f3837b6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c46b2a2dadd66efa09489dd761142622693ea76f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56a6ab4aa6d129dd04ac1350bed368e7cbc81fdfe5fb22ca1f0e1eef7cbd1e47 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f5329bd045c66c24a21604cdfe061db55144351 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4660e125990b1109c82b9a7a7329d186d889411b82eae3961b8f16358f1f5729 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9376f5062d8855e9523a0abe0a32ef45abacc4f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b151731900aa63b0ac8417a00a5e95a5c154faba355828f30894a80193ddeae +size 2716 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f35f46f3d3607942962b92bc4d993f709702bb2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11dbb1af29dc664db9e5f241945409ac47dce2d2ec02ac491c40db1883decb1a +size 2731 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bf017fdf59ec4120f1d7f1627a6cfddf19982e2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aefc702271b59bd7717b843f037d2a2b11c0350a35018399dc498a0d7d5d060 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d713f69f75c4a7ade6c771ab750093f06c9abcde --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e972f197da145697958a205e3afd9f1bda9308a596d310f78bc8be2ae649efb0 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bb91080937b2a0f1bf0cee543233bac1025a96a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b6c97515edcb290cfa8ec380ce71de655ef69d20ad0a540fb2cfcf2fe91cd08 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..423a8c7361e1bc0a64bae9447ef18e2de2f0983a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc05bc8d1156bdd83ece31c761b565ca90501d25bf7164268083198d4fd86e87 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f9b550799c61632a08bc348d25911dbc153d57e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f81aaec4df3459998f21275882b16f4321b63ff234b0ec20830481ecedbf8968 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6188937decfa604ec8cc75393a6fe3f89a6d6c29 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e37dc4aa9eef5c0ad53caf5a48c33ddd5f57e98f87d2142fdaab2d8f1884554 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae77a04e9cbc6f5d6e5643c4cdf4b40dd1d985f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:972f315b78fe672c209e369a6180885729fce9e8c726e1e70c0137704d5e3f73 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.42.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..233f169325d69b34cfea427221f9f28d5edd34ad --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1569758c360e920db5094bd99102df6079f667f0781eccf8d0756bb79bdc218 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.43.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..40ae09dde648289d2064185a2b4451bac127bdbc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fe7b2aacbf99d2f8057ad84d779fd1d9056439c2a26c0cc1729188b950b6412 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.43.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.43.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5476b3192fad90951a71c65bce42818b2f6af9a7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d75bc46db65cc19cfc61811058422a97c4a4112631bc34681c615e1ade25c328 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.43.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.43.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9bcec8ff300c59bd6e2d295f8335f6128be73bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d144104d6e7e9cb2003236d091b1677113685f9a1002ba435f7f58984fda89c5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd4f65f22be72ff4e63c178073c0b3c592b17306 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54bf400a540e46dcc39ea02b5467fba4f98fbcd52b2867ebcb6cfb46133835ac +size 1195 diff --git a/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4753ff904aab2063485f680d2e130d271ef072f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71e23aee51a680869776ca3d4bb6dfcf2cc25c06c04db44f4651a8817c15ac52 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..21168df609d5eda8ee08235825db64e406755f11 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ff32aadef20efff8d14de9c9d9df70da6596b8ee36ebf315d5593cfa37c25e6 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..16051ae3cebbe2602e349ad0bbc84420eed74f14 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94cdf6047dcaf0f08e994c7239532a44fb383b6cb9ff03c1621f0313bba3ddca +size 8875 diff --git a/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..71e7e56a2ac214acc3e421a7564e77e2570c4cd5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5e9eccb2a3e90363504f1ac0d058b69116862ad9dfed3974344cb1cd8eaaabe +size 8781 diff --git a/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..aacf6af5d4dff5bbb380ecbeb6b869ffcd5c287c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05aafae63492ad05afc6eaaa40e6d4313843b89789e0e90472883073934ac618 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c9e8d740da5da88d22089a428b32679d98a8e97 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:868af19076d836e46863291290faa630572b7311ebbd502b8e3df55cf0e7d77c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9b73135411bc4ec839fa717dcbc6a72295864be6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abb59b7c8638350e6b50ae322cef8d6bf13952db50d28b058478bf82a183fe09 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a02a2d2b69be19fcf75248a931c495c6fe238102 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94e20059cb91b0a653b8628c5b289a2aa2e897ab48cb20b4e8fbb1cf1a03155c +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fcfebcd4a9dfb0be8cc09c8ff69760ad754deed9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d015e7ca4dfee6ccd67e5c1b8ea9ab9ffb98266ba9b0879b7ba28a07bf519b91 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..75aee829ce6da983e1fbf4d9738ecbff4919036e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0d29be5384f553c6c8ff3da7f862ba94e556b94fd46978f229a16b195b33cf4 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..94fdfbf200d18c8a5a82986b8311f80bfacdb221 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e905d05e51df49ec3d992e3062707b9a951f4e07e114cbf6ebacf816fc064580 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a593b46f9b59c23e0ed570c27414392196a079c8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77edace6e267abf4c84831d42417c106a9fcf6a160e3191cd77f0d4bbe47799b +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d59e0b4a34a60b026d06d7fa832acb67585ca4d6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7f29b02212ac1855750055008b37e9009b9c716c9baf5becb7ab729bee57789 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..59babac20634e03e8e928d52078cedffc64a3b23 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:311a7dc18980b6751feec7e3a0eebdb887949bafbeacabcca9ec253d048d58c8 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..22d4f185ad5800ebe0484ec65f0e089ad5a5999e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8ea7df006c1fb21387c2aeb0ce759f2d45a38233951e183d3d6a47559d411e8 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cacb582751ca67424cb623cb00bd06206eb327d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35545247d26eebf2258431f206ce79cbdbf040640004eab3f7a1518ad0b6c668 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..28466dc7802633ba67ed3358dfb4f2e26b5b91c4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1c93f06aa87997abf467355896e35cd27909aea9591cfa87fe874280f2f3e7 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..946ce50ed7a834805f225160ca18ec2f342401d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:817cd0153e38511050c6782bd2e72eebda8fb5cd5e96a520f0cefbe47241ed71 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5320feb59ddb8b82bd8a44f3e8d500736ff4466f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c3148085811d71ce103ddfce3b9cb2427093f036b35ac10e7312a1160fa448b +size 8781 diff --git a/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ea870beeab88f16bf7d3e171d189ef3a97dd7cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c95297f577356db4eb85636eda34e9fad1ce9d2d9016209b2f96de5de24d7b43 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..27bd09431a07e1b4f057b30e5f3d014bb962e949 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7935a10b86b4b555d1866b90eec8d58e867021aebb317935bdd4bd36c37364e1 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbfa7fee5237134803a63b7c22bac43193ac29ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baa2d599c934b75a933d28b842cf9580cb035637175f1f0c8d522aab10788305 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c15330496d6e0bd0efb1abc58a36308008052e46 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11103e765a87c734002a391cb0ba34e27700c98e8bb0f09906d4b2b7c69054b +size 2716 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..918a65477a0a12495006d6f7e1a4d8a02f59bff4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:551be9613a0bc783ef99b307c146e945ee39de6ab9be471e4e3e9e82308466b4 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..298889b0973aebd849d14cda23ad5d240e5479bd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:696d338138e097092313398f598ffdb3fbff726627c7e51c2d32c47da79b415a +size 2637 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f50df8a2b4e8f5e9d1995d455153f6475825795 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1d8a3d28bb03cd853082b2cc0102ff5e3b44498d1b5a63a23b375e0d5b42900 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..58740560fec5c4d9f1ae8e4f0a1c0d3d4340d634 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df365640f18062c1fc8db48ba65362853e5a8dd8204a90480002fe470e709939 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb20c01ff0ec1a6941b179f8f5dbcc56f6a355e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b50abd5108162bb7e8c48769243923718403570abb8e4ac7ad8676020a9aa037 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..17b59d64dbb3d324311bc554f12ad95e75a5dc51 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:868baa0ce5d01e084d7a19747036cdf0bc459ef69d1b7b9b966b1f5a3942a4cb +size 1180 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2cea0b0f8a1582484993c9eafc6103e06179fb76 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2939ad4dae346b9f72f3eee92c69c33e3f4fe745fc9c858d9083e780e5ef4791 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2bd7564c21aa7ba9d3b184c38cba2be7dcaf6879 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00cceb51112ad7fb62c25c21383f56b82db084d8c1f7619dabfd8039be2f10fd +size 1165 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..60a622a92d226d85c429f142a2a6a964b6c2e912 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2c3d546a5e655fb444f0724d381e79f4336478e8e5558e648c43cdebc5c7dad +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7b7d4b35ca029a0e8c285e58c4c9eddf2adb183 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:620ac01c6dc34f244aab2cf33c94e2eb3b3a4b97d39bb55084cc1a233f579ac7 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f13d13708a17dd296c42dd3ab690f429f797c3b9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20a1b6702ee8950d8f287fa6862e4250ecc3777308fa79f846c415f9ee7a4a9 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e91f1c8b1e4572dd9d55e96c688a2c1a718fd57f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74b972b2dbc3b9c5e6279dfa3b0af84c34ab04dbba9eda2cdcd5459db470a342 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ea341c3b1d4d1d18ee94253259c2285694fb977 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07131abeeae25fcdd740a8913a2d02169312481027e09a5a4d8a71744392937e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7d726fe61666e0184f02f0419c561198a983df9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeaad6ed21e04b7bc2498f4d18fac45ea7abfcf5b4d862db47fe4d35033766fc +size 1165 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f0657d42bc56a79dc391aa8a270fca98d465bc1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a5398be424f8b5f0aaf501992f95124eda85203d00c77661f707d52d1016590 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4e58f0008cb06167fde492fb1778adb9fea4e24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58b53cc3e13bd79ba55ee61fd87ffc1b5e65c5da85ebec029f24ce68e2d93bda +size 8875 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fb608c531cde18c091cdee9552e121fbb9ab515 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cee8ccee79f52ccff995529bf79503843eeecdeecf4de2702a4190dd37b92ee +size 8781 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2374d354580a6da8f12ad4590728a48afbd7dc58 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f073be386718efea24ecd845b7e9ce80b47fab18c4b184bdc2b920fffcc4c6b +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f82774230b08a1e144d59445a66227a97100400d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b2473c45c21c554f6bf18512ca9ed4c66616d4436dc25b64955263ae75d70d8 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fbeac7ebc0de8ac92dc47db63d5e1eb477c82048 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31781d7167bf1a7a8e58f77b56dbecc53eb0b6f4ba7ed60a341906a0fd69f659 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..aba92cd782ff5a325953123933605b77e0b0c402 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27a1b83c755405374069d76756026e5b81399949f6674e0100bc9c8e9a5a9e92 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4fc42c1d7ae00e00670da9006cf22637eba21c08 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f3c3dca6e6901f52837ab1ea74c3607154da0094a1893de5b1d253731c9e28f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf207f13a5d9629646f70991a6cdfbd8bf5fbd9f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3fc690ba3b09dc2fcab2f3ec2469742286501d6d33a38e55a05cafdac71d8ff +size 1165 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..16e4be24787784ece6a1074c57dfc1dc81839f9c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ebda2b355e6ef12a55b5a9efcfd0445f96e3c48e6bce39aa1c3a5983cb6d9e9 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..be55c43a2fd51091814727e86bb39e4bdfc035a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa58c2885859f584b2e878224e56669ea659a46c44f66f619f5fa05e92fd1d7d +size 2731 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfb952f97e3be64ae0f1075485582d25269a8ec7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e1e6bc3c50a13a905391360e9062073aaa34898b104c5e33279f69502d95e8 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8f25fde3361a73bd5f3c037e042bcd0c5c83b1a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cbcc49edf14196398828654d7abc4059605c8d5112c4464042bddf288bb38d1 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e1f56b12a32b9838901c245a3587873ebb928e3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70640e2ecdef1bbcb4fbbb4ebb616479430f2649576d01d8c2c31aec839850e2 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3a5e2468cc67922573da6ff1d7b043c995623aa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:618717e0ee470e64e58ca51e09a4978fb49ef938278155484ebba734c2b40d60 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c179848ac71df2c9157ffbc8f0db2e13f58a345 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:518ce883549ff68e94d272f522cb417ae20fcef68df6bc1735aa9d80787a0b27 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..12e3e5705cbe6af3051d920e6f960c89c65fec21 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b770f45e21f48689be34796cb0ebcf53cb045a1cad7f77ac7df017ef18ffa986 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5119274d6789b71c7efffcfa8f6dfb6a11b50923 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5db83e3c48e1da2e0ca7ff7530f374514fae211cc79734c607e7f1e751db07 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.43.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9df7bf4632b6d4697a28bffc88ed76151f516ba0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b53075347895e682ad05a0df3adbe7a66104c6f94df81250af7b423f931d24 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.44.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..468c8b0f3a39fe49ac69e12de190b668967e4f9c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:673bc1a2e4124b5b820328a81fd436b9656c6ac6faaf4244c5894e4833cb3e2a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.44.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.44.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c12d0382072bce5bce4fecc29001f3b0d376e6ef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4545f298c2316e764d6ffa5ab8c434f1019ff57e91618761e3d04dfb1749bd0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.44.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.44.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9167e90992d9b7544ab69addd246acf86db680b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39604e28daa1b63450b99fda1c942c1c5299b92303fe15d116d03941ad32e534 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac39735567bbbc8b872f0776bf1291f6f968e14e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e3acecb44f1284c83a560957df39a7e4329b2dd282705f1414c8243458d73b9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..330bd0fe8248028b33993fdc9ba891a68e878e86 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d078ce92eb9228c51989f64c8212b212be1bbc0ae3f442148c494ce011e3fcd +size 1165 diff --git a/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7258de615f5213d370e5d189319d1295e573fcf8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:851e10712fbb5ee2a494349fbe4aa535778279d512551a919369d7927e481f69 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..02f56594a08b4246deb3d0dfc461813c77c52bff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13e841c13f42e59be53e520b719e19610df43bb68d88d9049bf47a334c76f669 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fde00b43ad628b83ed4af1ed40af63c90ef211b2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1410f3d088f30f8395bf4770471cbda55f9d85b014e90646cacbe22d0f5bcc2b +size 8781 diff --git a/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d39576fd7731ff31b27eab26fcf5d95379c1fb7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2d7fde846d45c650c60b5b56a2730f64b308672ff778948a910b5fc5d1c7713 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bad09d6eb4fb7e2296259d36a47643b456a522d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b74b69bb1e5a8dc179fa9faa534132ccc19837a3fc53ee333364941842f8f9 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..584f7df618a80eebad47c6f703c12d8e9303f265 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b59d38c365a34d1ab8f1eb4833e08ff2555c76cd648a69315cdf9f97647a3c0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c795225c261acaf7da0822d675748f3bb3fb7f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc9ef2aa4cc35132eb47100ffb76e0aef72dbaf5d59ba5193b3c1b5bf268aa0d +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ec5a7c808ef583e8718f0512cfe07a5f87a76f1b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:810ac2075c3ab07fad9a0b3e0367684fca8bb09683e72ad484762a3acffbdca4 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d249b902c484723cb5fcb661948d0a4547eda622 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4821070f0de5f81d8e5e6e3bda6bbc9ec19c2a33887450ad3c67f5c05bad0d1 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..44fdfc3877e496d350eb2e9d8796d5d64454fcfd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:408c870332e77e2d20fffd5aa1aade42989d890d5a66ee84b6948f23752e4955 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9aa4ef40530327953cef15abec733ff30dec21e6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9078a63785dc763728792fc3320981fa911389428f3be46656273b8fd48e1d5e +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..879e548fd03066e2cd8ed1214ca695d6c795dd53 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3b34c9dca55b37017891bd64c6cbdef6902d85ad533601765c963c8660a1362 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c0bb10feb1095e6fe98326b0052cc05ad855388 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb844d89a917e1163609eee11c3849bbe1d2006de3552bd584936b7cb0e562fd +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..230a94501529554b7d0f5500eb93d2c388822042 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1917fe47d866256c5598b319f2b0c1d5bbfd97e2c656129d7fb76bbc0391998e +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9503068ae6236cc17b1db5efbf389b819b909e9c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55a131c9fdae26f962f9867b6e401960b07431c5650a3d8882bd4a4bac86d16e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5f8b15f5ac776888cab93003eead0f230f9681e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08c464d014bdb7bdd1cd654cd4346c371a2db4ef8d55eba7ad54024ac35accf1 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f34135b94b861727ca81f2ea1f1cfdac7482f342 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b72cee724e14c88a1474de5b7f6a5112b36556aa69b2f14e8f725d5d3334d54 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea26d566529c6c29a0405e197fe02bf8d0dc0147 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c22002cae3b13636393ccb32699c9c8bec813df7c8be038c5807386828d08eb9 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c27f055fed1bed5b52cb15e54d12753284f0a9e0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:516e214c987115f9b6bc116aa9ed03424a945cb7a3d4dcca665d87de3df2b9eb +size 1180 diff --git a/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e4c3649556d02df08cd3affd2782bfb7e164884 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:052832b8486dd25e07affe750b40353c4dc3622759ddd15a727443aa20b9de29 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f80fc8fe17bfe29eb856bb3274d797dca887f73c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:512a00a2acaeb0e54a1962f847061234c04e5e4e0d86410bf2a12bb5d4e1925a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3d2966437631d011b2683026e0f4acaedc7b5e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:651e879c6ee7b6a1bb9a13eed924d2d5f9ee8d9eb22007cc7d6533125f597942 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..35e181a2be61f068812b03c397c4af556526b33c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb73d6f9e6804532ebaad21b0eb7412c58df6708e6a5e357f2fd86c65d24ff0b +size 2731 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6a61506a4179ee1e128c18823638247224a512b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4d551f60937bcec20a9183f199010b5f27e0fff9b3b5ff5e678e32840f0d969 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..db9ea0840f16e6137036ab74046cfc0a7cc51fbf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97c2e132a1f11335a87b1029e7f0405ef5b1b9bbef0e341a4853c534988dd76b +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7723d5ff0926445ec3531b3735c0223cf897fbff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c0939e6c2afdfb8c6a1708b4541ec75e38fb394f9a82a05a43633d002292ca +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..249acfdbcc38ebb5c3216f6b3a1b60667ffd9e33 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dc6e2e5e96e2f57ba91654474e2b0c622faf938b48adcae0fd3172d520354f0 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f02849330355706b8cebb7e2bb9eb5b4ddb3828 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa4bca7cd680534fddb4a89d77dc86646ee800f0ce6e24801029dd068ddc48ee +size 1180 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..420a0737fee9c5dffd13e1be6c2aa94c317ac944 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:999ad68c63ccb479d62b5469189ef2058c778fbc85f823e9043b76db45ff4cf5 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..41a671ce28631d0abb901d6ca0433418bf12a930 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471e075116e415b1e47fd8142e182d3d5097797d82437fcfaab905163324fc73 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7aeb03335d562334294851ae5a998a4fe749b568 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1f83259c20ef4fb2292aa4482d50cc07b21155b25ed17b21b0c3e1cd3dc90aa +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f59b77864c87c6b58d9999f66239e98d212a6a4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bcf2364225d1393452d3a950787593d8e9d8410ee1d2d0c141c9d5867b80355 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1b9b6508baa695a44bbda433046a3736f835111 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f06b6225b4ceb5c0e0df1e6fb0e51cc9bc14ba2ee0fc54be090292440782f9 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff3aa508ef87a31d44429d04df7659a71358295d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d2f4b9e448904ce33cec806f9e4e4c6e1ca6133e91339474628ba04c345eb60 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..00e0576029cc8e7811fbf28067034bf3582ad457 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed5cfc0ce9c543d3e98276d6a003de4f3c6d9a5614d6bedd942f1aaa4663b61f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..437c14d3938629c3a2521d7b7333cfffd6db795c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d9a8cad5fdc747fdc8d595468750107b48663c842bb3eaebfe4f9f9a9f7e329 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dd1e854aab0fd31e0888633cfae972ef8072c79 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94eec744cec50f3d13a2b64cf95674c2d2e9abe3f0149be8b23b3f810f77a034 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..95cafd548c70440ddc83bfb9f3f96470cbb53dad --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aaa170b697b33e84ff1da77393b05b29f64bf6144fdd4a54a2e93cac5a859dd +size 8875 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc726140e902cb0241e0e3da21391c4d5c99efa4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3452baa7f15f49b373984063a594d954a45756d1f0df77411b919acb564b4932 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d50de7223d30d4d91511ec2bbea876b07e08e55 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b374df543a29866999034b8bcb96b0a177982d67a76a072d509d136e21e2935f +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d0f11d0430d6a4c7950b0472cdacb75897df7b8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a56c847ed9d1e9024e2201dd9e9e9e793c12749a5591b28da1292137f59b14d1 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b228ea935394396e986c65cb1a2f50a62a9f3848 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19bb57d561bc206e74c3777858686ed39b88cdd39aa18096d0f6ab442b5ea82e +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..36600ca3be7804e750bd6f3e502ee4b90f3dd320 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4032d2d5ca9d27da219570ecac7e5dabcd8753dbbd8d4a208b0b4d508b57780 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d5bf2bca3d4214c0eb426d4e52c6ef47ef06f45 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b83a9cc2b340f755624cefb1ae5979510e819a2964242d454974472643f7ec7a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc090e7cfb8bfdfbcae1d301caf99ba56182f513 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94317bcec810a60c072178a147227b39327ae37dbc65c8699d2173910e0e0f45 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..194f5823b0576debeb3edf8cbd18a78c84c0bc73 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f421a255e8c8ddd8010d0d77d6c9de457f4f55fc1805d9a9287c0f64eb44bf07 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3ef3d8726f3a69f9ed6643e9e35981c21abaeab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bfd488a93afc411664f9e860a17cda3ea7d1bee207117a0b1388e15de57aa9f +size 2731 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d8cc2f93cacc178a4f691683e855c1af84ad03a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abf0dc7dd0e37449203236143537f3dbc384e33e96fd7a51fa4f4a629fdf13a1 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb111dda50f3e6c3268b59e624968543da6ac611 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f04749b096ae9ae8c624c33536f0e35f8be8bb37b10b3614f33b139005922f7d +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..70fe962c7adb8f8364abacdeb219bdb10bf1010f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf2c0f647d09abff89c8a2670b4dd71680a2575f917ebad4d3b88a18f5c38825 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5555eb8eba7246a48b5d5339c097c833d3b7530e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24028f9c46bb58b143e348521ec833bf9cd86235880a92ed3f0346664cb3f50d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2276fe8e0a8c284c03660463b94673c40c44b846 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb3fc7841f24d22b962bfdfcb9bde1352fd8467d983cb3c735f175a66a657398 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ba996369820f0b30cc6a73fb8a15fe15a737fa30 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1c2b18ff217795aa5a0271a12ac91bdc210e53c61c05cb78552a3ae44c9cb26 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6937c2ff9f84bbc8a0426d98a61e81862d147704 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c15717690dc518450758b35951b84928174fa06ec0878ed7b0047ae3a00e3a38 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.44.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7d681d7b815565fcf68d08801af954ef1634221 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f309d5fa16ba6aa9cb46506f606dad347f65c9da38d8d63989bac0a72bc53d69 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.45.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b989966a9bcc698d971e02abe330e42ef96b4fc4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:772fd234af469a5a0676f2e101edd6d0d5a7d67c9f191c0db7b57f7352f9723d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.45.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.45.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a26318ab8017f39c618891316e8d5573edfe3d12 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:101d89b14d5ec67cdcc33547eb6767799eda6b6f3f75d0c2d30cfc4f30cfbce0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.45.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.45.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7460344a300182d1e59efc48a99e3ab102e3126 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6da490890933bec929ebf5395fa4dad3ae80008c2ea4bb2b824c4a08b9890941 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4794e92d6739a0328cfbbccac965745e9d9d9b4c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4325b08d2e766fe7bbbf91bb1e85438e774d5e41dd1a7e074169f00c193d6733 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b521a3ab56563cf8e98ea45ef7065f9408ba215b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1576e5472970b3b37e1c0059efa48dd47de0062dc8da66b783b42aba9b251bca +size 1165 diff --git a/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..24728b854d27a9b86d1ef84aa1ca725f0605aa9c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dac7c43c2271c95e45fbbc0ac9656f22e02535653caff58338c79293e70293f8 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d53d33951a9f1ef4e69bce5f5f228be64b50e6ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b6c4dfc041396b1680c640f530f8558035952632aff3f15ae0cb6c176af8439 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d7683bc0288cf3cb134e3fa8c4954eb9d320caea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d2ad7d2cd48ad423a4247460cfaae6f5e3ce2479d146d66158d81eb91938296 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b15ad2bbfee16c99a18c476a5498fb07bdea048d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68917d198e2d00003409a184c588b2c74194ac49682fbf12c91b8df1b08db6a5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fcc17c9980e761798c48095c17aef61eb8dacbc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9dff7a34c71bc7e9702ae1bb6eca61d9b8083084d198d131c545a986f74aeed4 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b26f5b4170608ed197ba047dfe4355228aba3074 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6049ffa61994cbf435717fcc134282fc1214c05615f0be0bef1d542b216ed0c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c6a4cc6678159af6e0d45ee6a0c1ec6f5b0ffd9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c18d0e65990d0bba6b9655e8884f87c630915cd7109af0e3cdd07823353edcd +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fb367e37ca2cc901fc509166d7c2cfba6b00d0c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f455e520ebd45f71508260a03ef681385cd14183f650a1af76b8bdb12029618d +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f801fa4c006b77327b6e9d17d915acf933997aa0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d55bd2e6bdd0c514350d65517d368eabfbc1ff0ffdb7c42b5417f1042543837 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..70e30d62e4cbb2e4941dbe61f85e2136929f143c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4af244f8efc11fb14f6fcbeff8c7e561307255536c1a57f2c418b181e03dc1b0 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b51389a10e44d49a213e8479ac7da835bccbf953 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5defd69b1902c5f4f1037dbdc89c4cb2aad0fc4d8b6c8ee44990dda6b3061923 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6367b446f1e1fd630f46d748b996a38b824438b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a3eca98ca4637af4c2677fd4301e8e3f9fdbcb32fe2ee8236fc52f79a401677 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..85de8be497d9b319deb117ee92bbf8fa29004452 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3382470fefd9e2fdf983c5b9b7a75eaf3fcb2a3faf4dfd8fd96f4d5d693de64e +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6779d5e8a470fbe5a74e84233f76a36946fa8a3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33e87a1e05199de7d78cc9b06f80fb175917fcf7b1c1d05427b050ad4385c1d7 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7ff847d578ab83e19fbd76851aa43fc9e8ffddd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:539b8c06cb2a7717c530eeaf186db350233f68a62826eacc093760c32d1af52e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b337e4a41ad178423b5be50d7175c87788ad38fe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2c1f97149c398ef24e1ee736ef7dadc7b64d2d4d2e490c80350361306950426 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..33c20412152ef408f9561d6fff463c47ea13e6e6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1029544f2095533733729dfe33c9fefec19ec6b71f7deed486a5ca57b02272a0 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ac637ecf000d9b46de2d5ceeb06b6558d559e09 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50893e8b551afafb0b35900c631f2be49e68b6ceb526ca62e4cc70499369de3f +size 8781 diff --git a/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..33af753d11a6806ab6a79d74e989106b3dd191a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b888f7a3dd5b067bba64e92ff1063662db47220156c19b3a6ee0f7c81110e8d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ffb703e6320273d73dad458df074c2f76aaec5d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c88c8970e7925a8d4d12c45170a63b558cc97849530ef9e4bf89735c05da7ff5 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a0466d7949e1784d75ec69b0837e3dde8860c03 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf7f9908485c5448fb7a82edad33e9fa05f85b45b4830a798e2373f3264aa0f8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e5cd2aa3fbaf827215f552c21f315ab0a3f3286 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac6bf1492e407f079acbf04d855ccf8e888d83b559fef02698b0341ef9d35f1e +size 2716 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b0ef728943625ef012bed268d09063ffdb43cf5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d502874f1069a7d5310990e824b44f1f378978c0142a98cabb11cb4d49a3a69 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e32ad006d83124b2833822cab2b7f18fd727b6f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892da614f87f314eefa8bc46e2d4c87479be615c5c104930e7504136f8a0f9f7 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f89cc7e8d08c687fbdef4d6a6150ec36354186c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d7607ab77ae3c41d8dc7b3230fe7592b96fded375ccc4a4724b76e31e5ffbef +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..949a12b62da0d7e3a75909079f6b2c213abdc5fd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9196a00e890ee7653dbbb93f207724af9846eb782d87683274b48238b2588b8 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7593645372ecfa14ad9a30785f66d6137c12c93 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cce2a6cedfa74fc64db327b1cfd508544bea16379b393e57d9619252ba83e1 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9f4e7fa8cbbf547868b4c8ed64ec3be651c33d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0bd67834613c87ae12e52890fc8f86490a933ad7fdc6ab0a94c599f8bbf868 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..983c6339990ff6ff76a728746fbc287a9ffd8919 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be2f221179cacc882995d82fb8739c8298ba546f09f7fc09f44bdbffd5afc762 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e27777a72e0fbc347276a90c4f43bbbd9c56f601 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2fda9ba1ecf79b9b425888f6593283ce93a2e3a1588ea348ca386ca8e56411a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..195862716927ce6044c053ec61baaa2fd9957ced --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdbf4cbc025c319486ac5169151b5a5622c2c0715cc17f28607f3a347bdc603 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bfea2da834cfe155cbc647bd1c2b7113da34518 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f350ac7b0b44d48a148e698ca60ef580acb4df35b02fc46e27a22240559ffe6a +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f3e3d409b7cf8700ec85a20659fffcdec4e94d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a7325ec580c041eb39e5510083dd60ced8e9a223dc28a5219db945e0a6622f +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f540439f6fcf9ebc866c9d88b83b2a650c1d15fa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b03e766b7773ffda756e80fa8d725325cbba8deb61900e2093351e2d7c7f29 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..23d426c0879bdeec4cc2761d7037d3929f6d4add --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:969d1bf88cd434b5e2d1e79914891eb91a3c32c737887489c30c7456ea358467 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..880ba497fa4a198a85999216494a06602d27e92e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f7adeaaa3fea2ba392c14f2571dce895617a72c7179031073431e4875aaf4f5 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe0ca4b97de8765e29d3dd977942a36947be1936 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:625b8fe49718218e16cb1eb93babd68ea4236b310f3a43a92d64f6032ecfd0e5 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..55a0f6f4612b32977685f16906e5937535881481 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd87e915f63a5d060c043e831e07b14175fc664fd41aca81dbc52b203c880b3a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a74880cfb029a4a336029b9be42f59c486d7ea3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ce04d735dcf6cc85adbc10a58d9520ed2eb05423d3645528578bf1c3cb07e5d +size 8781 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..86420b118e6c63adcfe8d73a200e54f9a7a927e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f55fa2784e60c9b83cdfdc593630c8d59b94adf9ea0844836d2c8e04bb7c542 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbf01a28d2983cb92fe464f1475221154f2ab206 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a51ac8a21e336c7256e290ab9bb374e1265aaacad6dcd28b31a41c8490c02cb0 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9bf6fb7400f166d66692a3be5ec246d01317a2f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d28dc75ae5cde8cdbd65d54a2cc9f95f16063172937eaa7edff4d720db774372 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0199559694a77a39c78e4dbb051ba69d56b4c05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eddf6c9d49e5664007cc6fc9389a0c9c820e62235611c3c79d6ff86487350b95 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8485a163bb4e762e94b26db20a1c88ba7b7095e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a012b0f1952ef509a71c1d981a963aba33ed6848a403eb9a6248e4e050c6298 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8e18a4c31b3da11a6d92c2c0b2916e5de526bb4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88ac8682cd4fdacb50b7b3e644d69f6f3a884ef2edfff9ffd9db111797ac0c51 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..15e441a229fe8db03effd5de4551b20bad1a1300 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d520be7de5603d387c4240acc991390b7f207abf91c3af5e2e48d1233f412a4a +size 2716 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f920a58e16c15842af86069cadf63889f582134 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb8f21ab69a21506a956923490afc44e345f2d87d6719ce5a6bd60477625809 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..255c65753caa10a8ae5b618dd38f2ec9f91466db --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b5ea2cc64941b8b0f9cb197f8ba4ab6ae743a1a8e377371cfedddee4eea4fc8 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..91972c51cdd125008bcf35598c92413e416ff387 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e93da7687875f0f2b9716f3b9c32cac4ec7df200fb0f6cd7999c5712397c6a +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..abbe6f3f06d93598900f7f888a7f2e3e356646cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:729da8c43dd6221a9d172e60aad01144f3cb6681f40f40f565df7bffcc1e762f +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f25bf5fa861e60f711a9a6361c3452d31ea5e6d1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d4eb0e41bebb8bb23de27573b62d36a760554706d15e5f42435db7f5d6bf2c4 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b9013b3bb3115592f5db18d572135bd2c566476 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38efcdda0f5c28e5b293741ab5cb91238ac823021fb438ea572f5440ba9fcf87 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ccacc8833dd28a999d7678083ca787c71c794b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ba758acda7dc68e067d47d12f302469d1ee63cd57d477a694b10438c7a1ea83 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..62fccd630434e878d87ba96fdbf73fe2b1a471bb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eed1ecf314e162d62cd8fa45eab600392b5f2357298794f42c22f21aee315ffd +size 1165 diff --git a/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.45.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ea4c68d7fd15d49bd001b44f42d1fc5233fe0a4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90ddf471d35716a2384024425f45510c019f8d4c525b343f57dc1d5cad788169 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.46.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e27aadde6f17bb31c129c6f2cb6493e8e25f190d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d819680b4c7a2563fa2bbcbf4374fed7a5bfe6f063cae5ce7f181137bc4e1f92 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.46.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.46.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f169794a5451d29a657309a277f2c481131133f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a04db37a3dcd9440fd3fba8e6b44c2a672fcf925b3446ff464ad596e010d0f57 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.46.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.46.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..057a6c19bd84b63803a9eb3703767e582f0ad777 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a160c588fb57f88756cc922c897d45d97c8524077b6fcbecb994c54b327268 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..87d79604246e11bff65ac877b5b9a14d5b76c372 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f0b8f287b15bc858b05b58a8ec25149d016550e9f9b22cca913407584c28d63 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b40c4bdeb9265d02529fc2e6f44b54927959e348 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1a4cd4614bd21968b2c14b5403b91c3feef891c68623b0cc360493aa392fe6f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb86cf46593c53baacda71db3c99630794fcaf6d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:803b991cb59883e61e700e5b4369b23b4ad3bdbf38a21ca273c4400f602e674f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..49982cb60b99616d96c7b253c145bad8055b9522 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c81ab6ef1b6d0551780320ccfbe690cf4e451256869db41a6c8bfe97a1905ad8 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e21988fbbad9d9fe641b59bac6d6d2d71404b00b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec6b7e56efb769cfb68e64f5e8d2681352a50cbf70d15c284cf8e3bce7d751df +size 8781 diff --git a/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9eb77e27831bcd5df49f4b78d202906d5613b5dd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77a0d4cababc1a5ae056c44d9975d092f2469314d765f7e37876e3033445dd29 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..114088f2add7e2780c7f3e491feb2788addbae86 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3698c0d2ae1742480ff0e43cded06d7ba04de2b359e301f20345d5d308c85792 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b41089108e72f788780e3935add6bf34d4d390b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b5831ad26112c671179b4f8c5ed3399d093e11f9a80e00153c0d3da1d3bc09b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..445289f869db09eae0fa850f20b5f38321fd4f53 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0703ac22b0aca73df5caea356dca6c65faddccc311357715a1cd4a7ad72c3069 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..73f87289d8002d20447491aac38ae273b17fae8d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d503d1cda010c78c784f034cf117aa583aa27f07014614fbed07c8e1a6c2bce +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..12ae9eb2716c7a88cf5bcb7ba61d21218b369c7c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56c78c3fd7b419f450a7bfac90870386b004bd2f0192f4e737eed99cb47cc9f9 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab1fe62ff9610b634ae3639e6d29ceb535436c7b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c3728a1f448b64a18d92d3d31de80a60bda09a8d710f9cb74fca54101967b46 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..128e2db80eed7d2416ddf638db5ee886d860f14e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d186ed2cc3c66c52b90c7e6e38201697ef9b115c57b06359a1373c345fd6a048 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c540d70e9f993aa4fbc6e42bfa815aca9928b80 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cad9bfba1493ab395a8c4dcb0c35b8381cd18fb52725574460d8ba9d02c0cc1e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d813bedc34543fbae12e93d8d8d32819420a44c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19a86bce408102a83e67aefdffac7796ce5c8e8f05314aa3186213898c108b31 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ce5fca8a8b6e4647b6fb327da0b0a2fa6d00d8d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d920012a5167274a52ac845679d900faa4d8e2241af328dc7fa62a4ba99f680 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..748fcace471609603494d682bae62a541cd2c852 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66c22f5ccd641f6b9d6f669a7194296bf33c28742d34a5ef87999187296fe0a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..18304ba3742cf6e2b6b41e9749ffe33b95bdbf53 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eff8cfa63415704a70501ee5785c4b311136e9a1ce3ed847db623c34736ac158 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4203daa08365a98296eda9773871fc7b8e7d78de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ce0ef178cf172c93ea3a7e9a7f2d67e299b6af37b2259c7dbf4ed04eac58918 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d1965ff6e9c759756e0ecfccc95308fd2c0a983 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8faaaa41c714c38f7f894181197276b4fa65e2513d5e91bcfc9ed997e9328e3f +size 8781 diff --git a/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4570daa685100e25aa7e573116f136b633b4ad58 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af6b55003941a1ffacf3bcc3066d9a944e123afb8e445b8238cbc68efbee295d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..066257196be3962a794b96f80a01094048c583ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e6df36a5f76350a4d4c0128a2ea0dd3e1bfa65bc725a6b8443b3fd927cecae8 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4de6dbb518c98a1998efcfd49bf65aa3020c3c9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ecf37accb3494e7110602a2371d3c2996a63e2cdea32b1ba75f30a19c45c74 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..89d3335548edec01a09484a4c7cb08322a3b62f9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f09067bbc07640e499912dd8b69910edc45380cc48185512c5bb66c25c05dd7 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..61e7e1a45d698055baccf7080c1bb4d0b55307ba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05434ed6d70785853047a57fbe66ca1e67964af8e8141819c83d872f17b24f0d +size 2731 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb39f1da355edc1da6754e4118fc7071a69bd738 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa2dbec0ca1ebb4da7fac82eaf647e2068e2e267ed71116dcfccfae5c2e929d7 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d047980c7fad1ed9871a552d61c56f0371863bc4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47ec21ba16d7d3de6287f26cc93351a4740b5484c372066b1ccda1d41f29a78e +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a0f066d1bf51095f51b826908b12560aa09b2ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9517ee5d11305ea2e7774521acec7d863bf3151f20c6507ee592cb98e546f3b8 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bb3f4afc0812017241bd52f47f5863c11409b33b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26429a98b53fe216ad443787d6d5849033deebe7054acbc50cdde3b2d7c0ba46 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..781ffac29ac47388ef6207fdb6340f7fa07cebae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60bb9c11784efe34eb4e6ef089ec5930c360c09d1e6497b27a33fbecf4a0106 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bf782699c0dbc12f5a3414b62b7633cb1bdac47 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e2b75d6c540fa2bb0f8e3a1968841d790f5ce7fa78457d8cca83256681d807 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..16da6fe503245125225d9e65698622332da93d7e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88d6566187e43f43a567345cc6cd59e6ea7a03ac810b36082f0772fd3f09f3a2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c443064486ae79419fb1b0c9483346f84bc8f68 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dab867e2b92800bda9ddafb37e2b6c40965eda6f048bb8541d9554777880339e +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..934b02cb7756f7a0c2137a28da251716869e9325 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b260acc2034cddb2d7dca1d21fdeffdf448b912906be1a7027aaa2de82345e30 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb920aaa708cf2f64e98b61a168e56212c9d2eeb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b6e0a9b3b572f557845e1e691cf100cbf2590fb6a6f1e296e83c888359bddfce +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5052e1c4944d91196899871476552052bd35b711 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71c50bf1c41b9c87e16b7ef8788f370ce444a009f2f517d7a1c27db985c23c43 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a562560cd12ce4c8f96d5515b965e8a9f35783cd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b87f59bff6e22860586d7fe8e1f1679d5ca66324618cd96cb98a15d670e103e7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7a320efe484b6e330ceea5da8664a6b1d563a8d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a88fd41a72cb8d00eeb9f49c55418ac932800adf3a1a6bd5e2f885b4b275921 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..64f117632a4ef326129f0687b215d5115145bfb0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecba2c751f20b66b8a95e5f4abc848bfa9bb81e79062bcebd72512bcd0e02b2c +size 8860 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b60c1f6c32f143d4179c8c7b06b4d19959eb5654 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92ba74ecd0a2b10871a6d631fe4d84cac4a8f93642903aa6a70492739f01e70 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b016be313e44c6f469815b34271054c4dfab1f68 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c19596c75e1b7afbefc92f34eb6d931c4208a08359ef2197439ee4740db6d32 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d91192dd653d42c8709fc0542597f8387260a5b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb4916c14c01a29d5770098184d6828519449628e02b537467c43ec6f5b410fb +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf64f7f2ab980a0a0d1d9a6bbe92e15f2df916d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6632c767007736b193b17cd918e91a1406331dddce8863cb3c6da4633f2c4867 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3c7aa4218704c2f0f43c0c5bedda6e33bd58826 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8e211621d20c42c15122004d874759d48571711f73d9f14dde5827d0faacec2 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..995c40ca8c3c7ae385f348d82b0a69ea0f5fad32 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b80b19dee022d7c4b2a3f5e1322a70bdf059d4dd54143f95abbbd0f59fc1494 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..29a998c3d6fdc99358502676abe7a08cbc57cd57 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3428e8921cd5bb8c396f06af2d8e662c0d4a9894d24eee0872221fd6ccaa955 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1c92e3a0fae17a9461f8eaec9ef26d470143424e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575dfde29c19f63f61893ab06dc1395ebe10a9c248cdec438a0f8f13ad42bab7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd8e36674761ee8ece4bb8868788a2874f72adf9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23dd0e31559189a0e899f805cfb12f99dd6f7e1f57f55be32eb459d0ed4572cd +size 2716 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1236bb5750442091a1a1846172d2192aa036225 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7167f98b8abd5b83486dc712dd5b26595352c5e9cb234f0779c710aee86850bc +size 2731 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c30b2d494268d1b5aa5688464b8b2e25d889e93 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6f447f72a82c134232472f6f3a97e715ad1ca1fffa0ca9cf7b7a28677019851 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5ae37d0a7749049a5323ea79f38acca8a35cae9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6e83d3081f3cfae68801f64d6d3e1e3280712d6fedd1128a6a34e92a3d0657 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..001dab45c24996a2351e34c8bc3dcc540f07d712 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:027fdafa1ddc918be6f88d30731ce989a2e116b47b78d6139fbc4ead31b1f3a0 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea45e025d50609fb084c4761ef653a0a1dc36551 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbdae8360141948785fb40acd1c8ce99c2b2e22e36b479cc82acab8081a06264 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..66c6e90665c2530feb106b13d1b8c6d4d90bad77 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ee58a5a5efb914bababe1be8cefab2ab982e4eea4d01355efa53264493d05a5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..59f29383695ba0868e335c426f173cddd9275a34 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd42d4782ccf9dac40839e95c8d1716947880ad3d28d27ffc1e351ceeffd597e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b5d7a38812de6593dcb9223f85009848c71ecc3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63c10b77b0f68e47626b0ab83c2cc7304edc2d4ea5e842512f66a3e0adf38233 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.46.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..444793100dbc01b91e2421d4f7e534d8d3d829cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94fe55e4f4ddbd6cc2ecbe1d8837834747ffb49be1e3bade3f13598c3ba7627a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.47.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cfee6026faaadbd4a7baeae977681aadb4261d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e297896d935aa487dc3d7ecae1286cf8c08594fd1fbb4df0c9f3e37227e6907d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.47.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.47.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..62a9942ea2eb18a0d50986c130a44222e4960da0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70aba4137fc61c4a4dd664b9725e8b63db033b71acc33e640b37bd204ec6b330 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.47.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.47.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..43330fd2eebe6ddc4a5d32e6fb84c441132730ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60036c0c091a7e4391f0bffe9832c97f56893d3f286fc493877e7f1133f5f9f9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..223da79c3c6894264c4b886d4396c3b2541c8ef8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05509f1507a2b26e3a8669e39a00644bf6249cf4ad8e67a076193279cbf8fece +size 1195 diff --git a/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..022f2613726be37c4bd0ad32288bbf36cefdc9a5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e84005a02a9447b9b47cfb35ab82271a93f642167a6e4486da7edf79d1797538 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a59165944b5449c56b7c3f7e7b4457015d6653d1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e0e83837e30c704564ab79e09f075b308e1a0ab630201356c80b4521a3ce09 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7abf2cd18dc15b9032651c064527cef98a50815e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d10fe0982bc79d1dc87f7394222178aae7ea8dc468691e9eefa4c8a2cfd3a06 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..235a4d2a216e5c5f8df8b381ca2bcdca12ddc8ef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a39f04e82953c7354e1b91342185fbf7612d1643bbbf9e79e629b8738387750 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..809783bc0bc48213b84e4a02974b959b868357be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0771d0af78809080929819690a856cca63d00abceb5d22aa12c959c132ac0b98 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..eee3f6efe3d1252ea9ce1b00f37cfb86f65dc428 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b98d8b4c3e3e34126d19837237f40c06a0fd8ba2701fd25585a62c5c57e93ee +size 1195 diff --git a/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..744681dfba32b1b30e4f581b722cca33b62f8c5c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95582513939a4439fa56779e83efca21d6ff9d4a7e24cef7a03475a514d7647d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..35909de05b264f1c52554cf0d858006405896060 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cae15cd8d47e0718cb3b667ab3e7550a85ddb647103ac8faf0d79f3bf9a6f78b +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc46f831d99a78863d381fe01bb630b76ae8268d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4780de7510480a432494176c5044b2c31a2d58b17f23524972ce778c1041198e +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3adfe446c41ad55c0e7c692fac3239f7b887255 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09d2bae1f7308338aca30f9c825f1ed769c2c68c61099f526127940c933b0400 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..601679c3a055d838b264c8ac6b726eaf9e1a9938 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cdfa52a1bbd24b2e40d1d8648dd9fe5bc5955f6fa3d791307c668aa22302d77 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..29b553c3b4d70b97355fd8a341f0263a35aabdb4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f70477ff1a3220f17715a40302b194b4ee1b51d913ad80122769c95244e327e9 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b658fbdafd7cd2c08898ded9d2dee4459bacdae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a32361688897737bc9626c35aa6342653db54ce1b1ced56646220be000753043 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..994f1a10a0122cabd4d3af4d1183b3da3b52e82c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7866fadbed25333e3625547dff97e93e43cced36a07364c652c467c8d5f6e2c4 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7b4e07abf8cc40cb2bc29511398a3a153a0c188 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2afeb3345d9e0b3b92bb4fcbc23d13dcc293f5ab16481a735d9dfefbd7fb7d9c +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5ea885832fe069c64d40c55bcaca390aaa11d75 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e54b2dae3fd0d56e75c7869954535344c200fc53b8053e42138f69db9914aa06 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3874f06f36e947916fecd4ac23139bcf1f1198d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd639450e56827b153c5702c2d8a3843d5608a5a339bb900dd0a444be9a0c723 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dad7497e076889e8451e37a1b667c1f0bff20088 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35177aa24bc691d4b8121d17abd3e8eb0511fd280ff3aec9d083483f314fef1b +size 8875 diff --git a/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..847b0a3fe6037820ed8944472c3547d619ec6ec5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb4b09705d9a21b22d36ce3314b51ff36a9d2aeca79bdca3d414f7ed01e894e6 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..60301921ae9ff0870f4e73331664fe2d0c6141a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2926f068fd21db9685dcfedb5a72a8c29f92f4416d158def760da0f0d594572f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d02092ed06de1242e4f5f7be993ffaa3611369bc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1568286fe8b96a4f1de4319e2e01b4d14e67b66c03100a18d28008a8aa7b581c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4248e5962b9497a2afaefe2776f4c8c0a92251f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4da2ca6ce4a95a5ea4927076be3d2a03597eedd49c37624bd30597e39e4be836 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c680368b07b75e9edbc23d9bd3b342cbe7867bc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed026d4c96c9ce36039890009a23de8aa09c1a8b228e8e99f5fb8c20fb26a884 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..01c735d8f536f8a9b1c5d7f8cd12fc035fecddf4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:095aac78bd9e904687554a946604c4bf35df5d4bf3d31bb5248975106ca77e48 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9f6c1bcc38eb6289ba3f23745085749d4887cb2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f634945f7307aac4956e010fcc78c30404a54d6367a2f0929955f5e8ed03a009 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ab3eb11e0bd44951d9d5afa3cb10d0e3a785c9a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf4b0aa00761b809bad468b846842a0089ab5a2a7909ca7c8097bf13a68ae73 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..91c7b3f36c63255ace257445afd9dbb2b0b24dae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2ea71b1fbe3794fa2b935b0b47941dcd332f4c1fa1a6f6b1057a1c8fd935bb2 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..56f3d1e999b016e94a5e39640c6e7598967d01e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb53614ea2b11242165bcafdec91ea15a50c537805ae4778b48326d1439ecb3e +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bde8a4443b57cc13dae19876dc2561c46628539 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d453bf28745a267c13864fdab6cbe7c76672efdf75e501ed2f92ba42fb7a0ed1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e435b0e212e47d49e13ea4792f30257ac2e135ef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f784bc123632f64d3a3c6413c717d0bef2a409df3a79c64c31ee6ba21a805cd6 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e93ad5188b3ed9365dd775ae4dcdd9ac0b72275 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99bc57fe8d85ff6ff622764d5b3e17af672cec42975936fa03866a8a60c86f3f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8acc1beca96efeba6318cd78505d976c60cd8c74 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fbe1bb78be9bb33ff30e40f0bdfcbb0e18f9a68faa5e1be9ef2d0897ad26479 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..505b211b47cdee7ee8753ae33a676a65235a9730 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fe2b64c0bd6a501257e4ef31d71bed9a98a2b8fd8eb68295badffdac01598bd +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bff0d42f8b229e60e37c182dcc0b3c981804a50f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff17cac0917d9560801f53f79c52a6b48b6fb390bdd6d210e73784d00f8f458a +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..36bc4032696192a5b173a3048f577a31c6b4782f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd950af6d2892e2181cd26c084febde2a5520ec3515ebad76237a10b707cdc2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..51b9441ec5979e63cbd577d03dfaf552ee3c8dd3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84b5064f305c4e71d874fedaddbdcb01185afbef9707860e74c43d0c987d4c36 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..81704a42feab4b5c4bd690ccb2f857888ce4f5de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ec8dd1d1446afdcfa0c70399a781d8f09cf933b380adda3a7379fa01d6c3b3a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa81f2c15416a5a057ca0b0c6372fa58d13cf2a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfb5a27483584023c063dd0a57e8652bcb753c16da1673659e6eecacc0625285 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8bba33891be1f15d9aab46d1e0f74d0ca10d92f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bdf50dd99d6ddda51e67f2511adabfc5a5cef23786c509a75100ae6bb05edb7 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8823e4f5680abb366244c543d4d7d4ef105a9f20 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5a8b38d2d6abb9c01655383be46cef3e87a84569638c5f432db92d8ebec8987 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..80f443400383cfc553d0da07c317feaf80a735ee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8cb6ecf7e92a4afa80d7672e8e053bac08259e4148f5613f3e1097e460eabfc +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad5be56ce297af87bfbd1272056b028c5ad0e3f7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c42aa4468882835b1df52c38228ab4fe185e5260ec547c7bf7f3b732097946b +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c67ef56495fd8534cada7df2dc45160de511e009 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28a9c85c099054efca85c3b8b14754589c3e4c298be3097bc579166cf0f7bbdf +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..81851e9975876666713ce895be9431e7a69e3dfc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d720f2abaefdd598a67cd7901fc67af86a058bf61957652f2f6a7b53033b35 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..22da994beb36b21b08e51c9f3fd1b041e7ff5072 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8540f37f1e5b171bacdfce81c283918dc16d25d2598d7f9b91d33fe5943effba +size 1195 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..952409cc87f30015146cbbf9b4d54ab3eb2b24ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc4acb76c53eb646c9c7a10494c2f39f90e863954abccc62c4407d854f6427c3 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0eee9ae98cd06071735f0636bb6a2c88d6c3c9e7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:536f608ed1065abc1616cb4f4271a5f8c6337093db51f83d613094e07da11235 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f74983c813b7386d750649d32747f854f5af116e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4030a3a28a1a901dc5b84d97a2896ba59b47fa4dfff3cd65009dcdc75fe140f0 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2dfc42958b270a3c0542d657db935e2b0c1137b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f564dde4c780b6d760d14b70cf309f6395d3696ee1ae2c1887f77ba3cca38081 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..46d1a51f5a656033ab11897b7b0b4f90520b1191 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:259476104919d8eef95ca7c6ded29b3afca59a3706fb66646c353c6e84adb51d +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6619304781d1852cc5693693eeaeeec1108b9296 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab029f57ed5cd676acfdbb7782700c4f40204eef75027eb6e0e04180e26b587 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c83370d23d005a72987e99f0b74c11ecbd343930 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7684d5a20bae70c5597c8521a0478f36871c305cef929d631c849c16e41bde7b +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7b0d8be48f37b7f42245261bf397d2831e1a283b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5cceb86f7406aba08a6d5a4ba89f5a8dddb3e7edacbc8fac21b889dd95b6fb5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3c49ba1bf06b4187a4ced83716d2e81b9aaa17c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d76156f32b05643551819c7c358f4605daa5f79378e39c418e0bb9a5c8987d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5b68d1253ac4c9a9c684d308960b1351743f4c1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d29594eb977043cd917f09e341f09a92245047670084d6c95096cf9508a108a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.47.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbd7282d91fa360bb98021fb772ca698647e4646 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d3a56ab4538936a3e6678f18c67574c5ae0eb81f10a347ca2cc50411e5ee88f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.48.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc66d8b4810416f051cb3e7b55c26f4c0be9417a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:536b683d9c5aa4bc6719eea2f9646d486fd51f50a4a9f252815d45bd62c29367 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.48.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.48.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6720e808b42fdb91039bc23a03970453c770aa72 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a0cd2464b404dcda02b14d6df9441859486cd2acdec650dacd66d9b8b0ff59c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.48.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.48.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b41e81411e5932c9c5c7284a4a5448fcb08ee68c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa93a777f86eb37334254427844793bf1ea8470da8a36c6f0c670f2c05a5237b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d1ef2685be5e645d0d7199e17150bff294aeda3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f1862c42775b5c51b70fb0206d170ad5b7bf37997cb056562836c64c2621e31 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7693934ab4e985aeda2d8f01ab4a3038e7d076fe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ae2a86c5110d005952809b876a9e74c777cd006bd909eeb98aa6bb0c0ef714f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9cb8ac1610ea7827908d48ff97379f7fcc423e63 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6ae301b07def8fc5cb1beef9dea183e9079749b2d1dfb33d5c1a6a633730cb0 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..767e7e522247bf1337b636af7134ba3c113c4b67 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c831be533192e62131ebc2d6b2efc529ad89ed7084ecc28e91334df9e224ce44 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e181a60db2dff2763ac463471a90b8a57e1f77ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649580b0a77299d2b9106dc722a129c171dac6872dec45f57e05aa5e66e36059 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..57b53b000c02da91ce2eaa4404af469fcb266061 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad40239de9929028584709509756dec3e5777b79f545aff143a2976c3726470 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9898f3d73d503cf038917b01e1812633b5d0c7d7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b786726adf35869e25a98fecc5a3da4b7f4cc851c74e4735970860862fce8a7b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1040a78afd61e1ce5a030442c9a7e6188e6b3074 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9e0f8902bd98aa34efa8ae75e1f62ac4242db960ed19b99808118f61aab0bf +size 1165 diff --git a/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3abafbf4519de1ff32900411443cd6c195937f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbd6970e5f3772c905d1ea648c313dc5067cc8726a05d6d28f0260131fd49f32 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7440c708724a28a5bb3fb9cdf9529d5b4391c3bb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75585b12c7a35a594ed48909acc34ebbe7a9f9ac81d043a35799e74ef0472ef4 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d9eb49efebee4d6125fd9594eacd4d75891cc63 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0a7d1e8b8eda2b48385f5aa5c2b3d2914165cb0dfbf8e931f2a4cfe8405c2c +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b89d4770c958b5a480618a6c7097dd77322de7fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f05249960847a5fb403d877d197da44519906aa7338f1cf8cd60ea9838ee8ec9 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..973a7fbe3eb5e947779e5dcbbf59ad873e7c369f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40ce388f7d7d164193835948138bcef8f42aece7365b49f945220ad292589300 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f4b8fd662aefdcf97a77d73ee45b0a4fc46f5bc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3a37b8cd6b331dd764d853ecdf3658d6f1c4eabc628698d5e113a72aa33ebb0 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..85d4cf5430484435617a6000d2589a3774cee663 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:985efcefd90452b5e4673d85b4465a9533ab25467936afbd7a24b68f92c509f8 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f916f7f120806bb2b40a6b024a3b02e5cca347a1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91a136621eee12c6f691404540518ae04c7e79bc49356a3b1da2d5cc81ea3e2 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0306b8494b49600f9e37faf2f01c91a41f8132a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c536effb340d3bac634e2cfa7d9afcd556cf5b82b5a0f5f4610c2462478cf1 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc21a2aa624e6f8e2bc37059be4cb076120a885c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:488ad174b097fe9d26d17fc8c8e41f69f5b869e1d8164faa42813fd966c9b6d5 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa859ce36691b85a09374ac024f610de06f6588f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab789318ec3a17aaa2de2daa4ec9a3c32ac90d53f8945aca50424e5c274bc24a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a3b045b93ae3cf9db172d7ba058c133a9912696 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e684deefc390cc1e9380fb9d4614cb5ca0a555f42a74e95606f3c7b51b9cfda6 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..af99809c688cc6ef8ef71a6128b406bdbc825038 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d0f9310f5b3e01daae6e23a442c4c79905cfe3c72af58323c70a25e673c3c1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..be19d9e9031fdcde6d2b2175dcc57cc5fe06c99c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5c2bc5bfea6c48a92b5747502451d3339485132f5d23b37409c4e1b2a8d2e1f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..20d64d0027ab081b751d6205c956a0afb76d3700 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10e59868559a9762a4835abdc42d43d5dd107aebaffbce8a342d8cc1b340f483 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f205b9191035527d5a28c886397f562d43995ac9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c65ff95dea93ba03e424f6ed71a32aec0025592a95a03c4631b4fe6aeefbc374 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..77b9e9e3b85e4d788d1ddc777b08bbdf2ed71324 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc29c350fa741924749c0f7bbda59324ee4a83c1e55e76cc723cee8d4f85eac7 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..87b8f02b1b4f7d8a7a32226c31779a6e5e56cc36 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac999f044ceeb84249e06f160445e43cdfc468a8adda367ea35d0ba875edec5b +size 2637 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..227d342bd3366cb7678d2754ddfba8427bb240c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73f336dc52551f87aae6d470a4277ec895b569042b5312259d6c18c4a745a3be +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..eff5c8c66f29cb5a0a640f5f8824cad90177aac7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:705e4f2f81c4b4d7bcb83ea3f5c722a342341b68831bdfcf2a8bb880146b6342 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ce155473faec7866af25a06ad2402749106759a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:082aa8be769cd467edb1ad45e7811b47a497641df5b34e0607ff3bfef756579e +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bec64b4e095dbfefcd5af8f6e0dbb7a09908331 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bae96b7ead3ab118cfcc001f2603f025ecbcc57f3623df79c9995fd86d28057 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..04ec460c04cb0e6c11e4dfbbc9e201360ad11dd8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66e6c34898b1822d5afa00581d51ce95db154a4e15c7451542a1d7f63e215f75 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..259cd33b1041bf94485e7299f702301564ce38ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12ef1408caeadf8c7b437efcce6f2767bb89f0563103d77ff7a145bcbece9df1 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb392706f50643c87fb3f77449142d04792958eb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:627ebf2cfa9840505bee29eba65916cdb3e96e270152cf0a38f2651562ef0c69 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..56cd717fce00c8543697266fbcbe66a4ca1b9111 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f7798fe56b8b8b5f6045194cea9d22f6f4d73473c04fac6ef45497a817cff42 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7919e29f04652a300d7c341645de151ce7e0c6eb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f533b0d51b676c2ed99363f8af6861ad2bfb586366b354b60a886be8c4ba398 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e70d73a41f3e168a258d038cb3ccdbec58d3aa50 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f965e360690aad861a29ac4bacb58e5130b641918b669f9c660d1b061ecfb55e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8df07195d8d3ca151ee88b7a061ba090d9210abb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d708218dbc484bc07718a260c31d3e18fba0abc665ae11c21bf3145f254b1d4e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..924b700f12a2fe2d8cba91c1c68c624758691491 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08dffd454b54b9912c4b9d128e229c1013c2a3c7e287b3bfea161923531e8ca9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..12765284f0fe17b5399c9fb78c091727c9784a96 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fcfde0abad79aba5c9f6dc4a12b23dfa7a66a28f8d3b424386ab3c9e10122fb5 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..164c0d0e872084341f4650b44c84377be9f35f19 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ef3f255b64a6b1f00397f4a18d48179003378cec997f113d00a0bcf83cbdc1 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bca2bf459902be53fe3e41d4630e6a8a9b3b56ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46d6c56e03a0d3c2cbc05b75783bfa7923c213fedab8bc23c7888d02bbd4df38 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..65166d90c6efd6f93f5b87099c08c23928332066 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ae79dbd5a4918ccdd6a38c1d7fd25bcbe49b9442aadcca6d3f4b4a2bf4cecf6 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ecefc7198db94ff47df4ea1cfcb8db824e580c9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cf08ece2ace2e39db93ee6a878438b3dc86838acbe0a5e5a4d3c8dae1ff8a25 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ee249009c8b0303581caf2f5e1743ab8a27c27ef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e0f5a594b1e6fb96127ec410d06ebf82113942377a98d2c5cf01c0e78e5a21 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3def9f41428b3b3c0acc7f266409f6d0b90f5e5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:350ee39c1c22fd7808585a3fee54de711a33381a42433bbed6da9987da4098a2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7699e5be11f3934546de49ddc28513804d2422b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8a63ff76a91ea3c6274b83cfb0f46fb0c70af14b02e44f1640c7f6301ca802 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..26d2e11da4b607add91426004540c86ab6bd6f4c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79b7476392f1b043918c5ec54653bf98c044cf3dfa5b178c62746f125786f194 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..68906733e56c71bba6b570d65dee052541005548 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69c1b2ba65fd6bfe3850101216980b5657be2327825195272135fc7e043777fe +size 2716 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3104dc7e8b4d3e837b8d6886c7db85e2759f32de --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21781a37ff05bceffb8882daa2b4a236a4ef8ef23fc6bfda4742340fc8e792f1 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8b791b19cf43cb9766736578f3e5f6b2fb9eb72 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f2163f5340a500098c9985ffc3e842bd46646da6416e86b87d1be691bb09d08 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e99ae3f4edd34b44515fbc417fc0942f3395310b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23c3326d7e42d80e2ad0ea51b269e95444642d4a04b746f6aee6d001c925ffbc +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..66e6af0da2046d10247db14f5453f9153353ff33 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fed75ed36245e7affb2397dd9c1ebf1e3ae38c7b36e00ca565b42ac0779cc5c3 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7296d9dc42de21044675cf1a347f0e3e4779799b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6653a8afc6bf750a08ad984662d25e47ae7f3ede16f411561fcaf3ef985f1591 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..208afe253ed7b3e11e81740cdfc96975b4162807 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0ab433b76197e721ebba5e28bb8e17d50cc932fd87cf2aee1ee5ff1e2f49c53 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d794d7b8b932bb0ace92d6d56336250f6ce3a809 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cead05753fb97a7669e0fd26739ddd901f8d9ca8a7f324b7c29c4fbb8534b903 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd78ec1a3d5d8bfcfb0fcbf21ac32ec68fc99491 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1183550edc9be5fc1a9ca93c7ed79e34bf42e2af4419e96b06a221bae98e41e8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.48.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fe816b59ade1200a747a47cf6eb7ed835f64d1d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d51308ae46bc26f5906d6c194755c1036875145b571a781a86497e8da798640 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.49.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ce17289383aa06e9f5fc14fa9005f593a06463e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae8a503c9dca386e9403cbc71d26ef64cd49c66f8daa0d8c3c0732b9c9c73dae +size 1195 diff --git a/global_step243198_universal/zero/model.layers.49.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.49.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0b26a410718f81ecab7316caba66676f9209931 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aa81c6b35c5e33d0a94922b06a154c5f05c8c2fb7a253a1f9964d19d50c3921 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.49.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.49.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..470e9388bbd1f5c224a9fd6765adfc4daf13e558 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db34e6fac8e33279e1da2f8c36611acd9dc0bfccc6310aab46badd844a9599ff +size 1180 diff --git a/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5b48524eb430a02b162a8a38aec815e490bd9765 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44b8db5696bdaa61cca6de2c9d3e0e3135a31f5634d463487a2aaac75d57ec23 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b27e0b7119a9541148fe38f3d238430b6f76d8b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dccc282688d76194d9fb13d1395795045a34ee1f5a677a9d2befd0d389df1898 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6dcd56babe06071f352cb97a61a19eb86fd4f2a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:714e0111bf02af651be8f6ee01ace97a71b821e4571c8c0b283a4c09233aae6f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d52d1f8453941e70828597f8e25a51d2f690db91 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2804c3ba36f20d2cbfa677aa124a04b3fdb807af3d7644230e3b871d3d8789b +size 8875 diff --git a/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd548ba47fd1915c80913569b646cee323e7a478 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5d1922ea6e2584ee6fc2df4b7d8e17981c752b1e12ec4da8f7ed1313a6aab19 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0594f3f5162e4c1f8ad26d5954a45fcff6f575a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75bffc64e28bc184008793160ee346b24f34ebeb983ad81582d73ca9621be287 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..02a0b9cacd23955cfbd31b77e66bf39ee2ec0128 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98d97ef047575832092ddc584c42796c9795034a0e57c0f88561450f77e49263 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..487e6a26a9ec5acef25480559d9c6cf6ea62c8a4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d4f2b03b09141c4a56060f1e65e6ecbeed6b3c7daf11500e836b78aff2e15a3 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..debe546584c6fd890875366059fb19f9ee860122 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee81434293e469b7e8f1f1de593245d17696cb5d2739d18443654809b31b811d +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e43a64ebe120e13b5ff1278173edb6449cbe7c1e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed8c2c6e8de2f9f6e82a614d6faf41cb6c6586e0c8152865d28e86265a829449 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bdb93d2c30515f3152b993cd687b35fbbce97b6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b120bcbd9178dcf5d201ff15f5b6722dfd092c253e3b00790a687ef84e1e6660 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..43e84c3ee87d6d01e288344de72f9c82dacbcbf7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b09dbb257f5e6f7945e074a2bf9e9bb36425dd849c40f7bf69e7cee21c65ab34 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..561565acfab12c24bbdb7bc5daf6accf1a152012 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1b98b383abea440a89aa10455a75ce211f628c571448d271e820af37a6baf07 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..91ad63b757e2473908ed1a1825d3c408cd89661a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afe3aa0aa0e3b2ae9b32eaafc6606fb83b60835eaf0649265f073893b5857cec +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..82430a43df6a8643dbc6c63d05752d6751c00390 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b47efeaa32c3a99e25da5c0225a93bccbfbc7a93c7568ba7c2a641579376d9c7 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..00f9073cc7ee2168b02789f696ea078f251853b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:430bbe2cc0ac2e0178cf510f10da702d26b0ddddc3b80576608c9fde84ee3f5e +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f2dbae419463241600fc01ccc3ad7cc09298647 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a5e61e9fa567259a3ce34206e6d90598ef5fe84cde15eff6f0f20c9608b068e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6d277f98bf786a37a478692b700e94977765e1a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:042dfd27b6a22a1a68ae5425f4f6a78621925540a61ec1fe50fe399bbb6f7780 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5382d082c98b739dfa0d5aaed2e021068b0caff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76441fbe71c462b1e922a18edd0ea0983f6facfc7386577497f043dcb4f2e38e +size 8875 diff --git a/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..52bd180e2b91415d82c5977e5e3cded587338f66 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc225f3817a8072ad95df1c6b3e81266f94486ff3ea617bc9ca78e59de3c2826 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..67476c286bf83f11b4b077ddeea662376dbe11e0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0416d7220f2e703ab909545d41cca8a5b8684850e9bb989f3e115e392a09bd61 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9d233f274603a717a608d0980d2747b1f4aa58c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f43181b68dbdee1b8e1cefb6df9a6013fba4835198a9e3784d92b93c63aeb120 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..89e01222a77734a5e1e3bb5fc66d47c8106727f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7647beddb40714bc15dedda68802bf381d15c5bd4361397bebc53692c918762a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed484ab8fb600746a765c604f8568818e658f3e0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1582824ab2e60b8f2623b3c00635339af0f84ace90606171860c23ed3916108 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b5854dd682bb61e024422cf751d19b423791f3b5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c626d4a39d2e12868aa9182a63a2298e63dfeafe97dd4e4f6722b3fe78357731 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..84c32600a3e28193530b55111647ef4ea8ed5110 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6224eb3380c53f4106e08b968f92290d3e9fa04e05ca97a5126d355c61b0847e +size 2637 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d63baf948d63c89a424e34f1f65ea0b8a3fee32 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f35915546c8109461f5ca6eea7ad208b7eacd637357bf0f0155bc72798d8191 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..aceff2a046f274b328eae27889caf08aae113561 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d61fd00c2cecdf6ebc295593c7ee98e5779b899e6774c212a242f749ea158f +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9521ad7378026984cc8db055fcf963336a5e13ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81e6d5fabd9e6a041668db64e3b8f2ff55f90541ed559dd5a3a791cdcd3f8f86 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5094a585576344ceb760c2ee909b5e28237177e3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b524350cef8bc7d2e75f2779740c92148e43cc20df8202792eff8eb7909d41 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f296d94c3c642cf9022639b31f837627d5948021 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c653404f617e6d67df0a2113fa2572a588163f03ab09aa725c627c7bf21a247 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..40f57b31287a0d11d0dae0c35ae600218cdbd2ad --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d707fac2cc302e585c57ec2046c1cfe66e2837ffb2234ca34a82374175c3487b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4dbfd9cb9f860b91dc635ee5b125b9c4eb29f6fc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4ce5c7e1da1d4f39bda9b80641d05479ec07f1aad14ce6eeaf2737784259c0e +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b79d5ec297df704906001486e772d1491cc499f9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a48208a4341fe56dd3a542d0ce9e0fc6c3c8ad8beab5b864d1586ae6131904e +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..af900ab6b7a25752fb11608c706e9fa5c8a60da4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d428b2c24f738bee020e6f8f584ab93f26b494ba65c5d076bd6030871e11985 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d0290049af9c976fee2b76bcbd8a9c7800546f9a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a13c4522df1df0ee9eb31354a61f1dc973061ed9d53930963fda89147781c77 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..78413914b0c5e85b614a10b113e35030e7d05113 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d8514f3a63a30bbbd9438584c9d12ab8a1cd2dd1112f37672867fe107a5b73 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc9fbad7d9bfdab28fb84b06d84a59d2e87e7305 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46242cc8f1b55e830befb60203c2192473a8fe5cac4fed7c1f6c5764cca62844 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a0612f4a58765d4136007cf5776eae55c20e75f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8cf443957b157f70727c925dc371f987b00a24ec852a8cd8b32780ac314ea06 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0d0bdbe0a1ce713c33c12b4c786d42c468c46a3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28e29c81faf5536ad15f18d901e0292be906db13b92c811cde4278554941828b +size 8875 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c4beb79a46b42431265fe16f2774d26ea46f388 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad43df18be25d2ca513b7fbb35e2fbbac4686d851b6c72e3f209711fe62aff0 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..831fbbcbb0ba921124b02ad8c3692d4e40fc81c3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e63c57604540651b50c3c13c2c56b8d3eec374b61576cdd7603710338916d253 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..60efd3a37b5bdd8f36b15a9cd74c909a08c9ee9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:beb409b10cd77c2ad27ca7d47af7c2257418a32e4efcaa925f88fd4ed12f650d +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8a6ab1ffcb85a1bf1c6a90a25e1c22e0be064ab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca66b916a90d68ee0fc493ff65cd3d93894bbf86b94ac8adf602936236a52edc +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d4877dc743920fe548fd74cfc5990477f6d1d4fd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be076a96d0caa58210efb441ad09ed23de2f4ebbb8d421e429d700fee2090573 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc91506ce27df276bb5eecc6062766c83a0c1dee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3e413c725b73a1baf9ab90722739dfcb683902edf4eb200e69c8a20ab68dceb +size 1195 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..15811ffd15115a0429966e65c698823694233c24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a1f6a356e297eabeca8fe407b215806cbc901d509ca4894e7d6d16052bdbe89 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..22499353f7c6ebae1a246a8f56d149ee1af9a2ee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97bc77a07bbc0fc391e32d61b6b35af264935338dc66f765ece93bdd3237508c +size 2716 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef779b511d0b0d9276197cd7350127f95cb2cf37 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5499e84a910757af27fa94a0d7a9bdb75c74eed732266618635c783ffb9b6e7b +size 2731 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bcbb4366af61c66e70b1c4c8f9c6df01dc606230 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ba0eb5bd6962c4bdc1ae6304bfe21d0d9c76f477ef882e3b82ea2bc4e7dd00 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..30e7158327c2e1650879fd51f55ae2200d67cf9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cc2543cf477649649980f4f2c35301f13ef6d0efd882ce8197aa432de502579 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a093316006774025d3358e16c60a18e0b136899 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d47097d12c8868af7023bb339afd56d8faa04e00fed468e878c0c99c951fdbfb +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..039354a54d9eca53c977527a832246ca7692b34c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f3604b1da4f1e4cf91be5dcba219b822f36ef3c88f9f84c2f09e3c7894074cd +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..efaa67348413f12c68a7bafa20288ada702f0779 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f2304a3ddeb1a6b0daea30b9ce7b5a35b19c81fd944934431d74810fe9e6139 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd85795d2c889a3a94c54d767aec45217ea301eb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:057aac3755f018db416d6f33e9d8392197a0429d9b35063cca49316d3ced1877 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..123b72a7865d89db95580d59b169f4c4d4838ffe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cbcdc834d95727221c8115145157221f5738ddfc0c16217117ba4c1608b312b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.49.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d38975510a54e841fb23ae64569f6367b0b140b2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99198298efb8d526e42f5a1c53b7dad880fe1aceaad90435748c626170b00daa +size 1180 diff --git a/global_step243198_universal/zero/model.layers.5.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..714d22548f62350d21fb3ba20afb87e1b15677ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a4e515be077d11d5944fedbb1cd706a731af85e97d1fcbb08ce7d506025929 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.5.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.5.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..00145b558d4b9a8835ab4aed57d60507b1d72851 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac57273114583338a4143ce8243ae53941928fa4323c2b36db787a0552a22c7e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.5.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.5.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2acb3cada6f93a9a0bc087779cbbf4672bfb23ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d71ddb5cef16791f1da8e1e77dbacb73e50d4b809b0d44678d3b029ef095477 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..86c03e7d780d88bcdf6e543e831076b60ac8cb02 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cdbda78caca1f332740ef93760342f957dffc3eb384ecb363206f7ce35ddfe6 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7381dd2f759b30be7a16d59590c556439b41e23b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:692ec1e016634d1dbe1249398187ab646d7cccbca1d8dce3620368cb31406ad8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..711ec64a16d944bc787bd49d1ad0248f157b3b44 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f8bfca402a8b534c1488b6383f0634557096d08cda1df155c50a6ce2e48a6a0 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..79dfd6fdb25f16fb065916a614e5ed2e5d67142c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9932d210e56afad800f852082dda74dba893763743df983052f956cfd60d428 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d0f015f00943bead503fa568dbd4d995c04bef7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06d765afe930424bbcc7a58cdda6c6579dcb123f5411e8ee3d7befda6e8040b2 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d4e189301cd5b3017314a367f3863655e27839d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37339372da5bd41757ed7d29d22b3bf053e7a662772c6e85cd9897a4cbecc3f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f643dc777c2b10520d969fa6f78212f9e501d12 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1876ff2eed74fad6645b879f1b7114d8185aa325692cd3cce8984fd5dbb2d53 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..563ab5ea6e997364813a0322d2470dd3f2e2e722 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0ce86af3aa411e16d73652842e048c6ffe0ac1b3746375c31453c192ae2d3be +size 1165 diff --git a/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd763ae5a892be4f5aec7aa0f7383d524e07d577 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5356748c380b807822387eb5b0cfb2081a14f667ce803ecd83adb471cca37889 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1a60ba946b33319be61aaf039eedec8842b738e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d02391f8660b04071400cf0100341b566185721ac4bfbf351c00ba316b7982f +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c58cac20fc5e2cd58a74cef8030b3fb22e4357bc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5758205454afee911bb646fe4738bd944599df7c1fe5e52db943328f5c2714 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dbdecce469839677b00e241664149398affd0ccc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91d5aa091bdce27c706eea38d771ed4341cb293a5353ef1c0871abc9600f067c +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d27ec3d869a7f42e13d455fbae2e2afb4da5f077 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07f8c2b8caa66ac0b02d0d2726044a41a133a716d481e3f8235632993db82032 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..276ff21f889c6d983461b6285333fcc9b75bb214 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8f016274fb4cab9e2337708c43341cf79cb46fb1b15b4df4a07cd29c2589bc +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e55631d3c962a4b3c2113778c41a75cd69f21ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0adcd446562418a07ea06a88d9fc4c1bcd3641820c936a4801cdb49dc1c7c76e +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..53961e2af282524bae1f7f408ba0611f184df1af --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1cf8b1728dc8e1aa52d900b0481383a21c884d205ab3a7a9baab853a23acf825 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..47842fb132975a6208e3a530f5deef3597b5f35a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:241edd84cfbf3dc67f4add5fb46aa972f884f03a6308acca5772ef9c750203d7 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab3c35d404da6c61f00444ac38ea87ce25d81c0a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36e75fd7e7ba8f0c8bed93f6e20adef109540ca5a16196dcb802b7194437f528 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..134fcabf1227471d784ed0ebf18a01cf6525db04 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:227f89cb4ce015eddfe166a14cb33c8426f78214535d60f9e0117bd83859fcce +size 8875 diff --git a/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9d047287fbda39d28b795be1c77a360aa3c57371 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38c979f8d9424e560b654699679cb8835f8949f070a959a56cc824f8a3d85c07 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..399bddc066ac0b6bb10774c4ea8d26f9db4e1291 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dc44de5fefc1880e9662239425df8b2e74e4789fe9b36089e044a2deef925a6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2fb72c7a94652416474ef9422beeb2059cbc73c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e88d0e57e1c6d2af8ed53990b01da98d43117b8e47b53157ae5ba098d6adb262 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0baaf9619649fdf346c8bb1796c3e22bbee7efef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e29561b41d85f64172dd7992b6ae6b09b581888aeae2b3b58b792f867030613 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0fe345da648c9dd8490d46683101e67f75198597 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80ca76c21968ff52001f379ecd16fc9544502d146e3694d8d9d20d8c8d4b7cdd +size 2716 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..35e01804d6a6f05399fefce73f6ca4c6aaa1a731 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7332948dc2407c187e38d75df403ee82215a21aa5a1608de6297479b8b37cdf7 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..490c1ca6bb9b7ef9d202a24c477d5ed9c392b497 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c21cac2020e7d502a18a4ea8b2dae38368eb094e804f8993c07bba721521615a +size 2637 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff1e6d5e37fb6c233594193c6257a14ce6788e80 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e38dbfca4b5dd6c741db11683432807cd33681196c4837e883aa854c7ddb3b +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..40facf2bc243040a36c5e2d93a297f9143adaa05 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07c33bd28c8842c3bf6a783ff0482a8b6f5dd4049efc138a42d5f32035fa647b +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f65dd479e6e9e6a398d0e738d3a10f8a56ca213 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9fb0ccb700b0f5e2cf4ffc7a00b4c37b9a381bf37836c61831aa28aa47c7ad +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..263756e7e1449005dffbac18e252cedac49ff9e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d39fc2892af7a628dee9ca6d774ab50741a8888dc38a966cd557defa7ee62c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6f4a8b6ea2a6cb05654b648681b794c8b953606 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c2adff0b63fc25f9d082de6c7cd3ef4544f0235aa41d0ea34f0d3be06ce7154 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..64948c21e3e46f5917780eaea153f48c7b3455e7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aadc9021606169e8260a41179a52b0e804e776b851381ea127f8193425118c44 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..241e310a768e156e0232d58d682092eb768d7f43 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c97b2c3fb245951f6b1346dcaee1620a44a57d5815543357d4c6cb6283d3bd85 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..71dc9132b3994ff83b8f4d6a55ca09eb081c6db4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfc7c0fd585da26147fb891ebbbc290e94fbe397d77840912a15dc857ace608a +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fec9376ae35f911f47bd98faff7ddf6f0ca16e5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:117566fc28efa37bff5473f32391e30ff2e72f7c08a7ca19e576abb9fd6d819d +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1636c7f1c8bc83a91a58ed1665fcd58110a11c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd9b412712f19878c18d390d71f7ef99c681a4d46e6e066c37f8b7e363f54d45 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..304012d80872bf18ad0b60ee4420c97cc7ae4b25 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73e0674b2c981bd3d0282da5f3ccd5742f52551e21ad2388fecfbb84d6da490e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..806c644ace98e230f4c82e7952c4bd937ac0e2b2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38676229b15c6f1437a5e7ce52396c835d005d5a6a23e2ce0f787f810d6bf276 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..00e95574ca58c7343e4c3e4bcd58a3d9f8a4a910 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2cbf6a1231914a5c5d6e1781210471e4506cabd649dabfe07f8a66a39a51819 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8222ec0cf09c33f30431b0876feeddf4ce773487 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75c5eafb6ac4e43e1f988c071670d648681171842b22f342430caa5e713fde3a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4469e9a6b2db4ae300b27f3b014cdaa22c3981b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f53a3dc58f4cfa94efdc72c71cc87ffcaaba17beca7e1566c1b8648eae1721fa +size 8781 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..68657476d75f5287e675d02691d3fec8d082d166 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6170df535ee41a42e13345f4f9d0b80cc194b83988f327dad8addb576dca133d +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..af1893b6552a7a592704639fd7e08a22d3caca43 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9bb4d69ff09cbff6783b7ac8f845c2ee8de0676be1fd41f6359143c71140f72a +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c81b82401666747954a3807a9599ddaba671333 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c9c91017db81db797b3f238d5d77cab353e1efe13e0a80a5ff2ee7375bbb3db +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a812498ee9917deb8e1e5cc210be17b831ca6a20 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff939ec4eb5c3d24afbf41507a57f89ec1a90fd96090404ed8bcc0248c626281 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..23e2d73fdd2f5f67261a5b62be8f1f45254c43e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ee427f1f606bd4c7323b3bfd2fd88487fcab70e00750949a46fe876a71ec63 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9c7e12f988f0b337e38d2b0e3990157fda971a4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c03d773780e8ed731058c384524e70d2b8f16018fc210a834edb0eef139279a3 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ac8ecf732f62a6b6756f6fbcb0034140c81caa4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8601c0a6fa0d2137a193f46324dca46ccede3044b583933707cd0e7ba37cc30 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd816470c9e2e3f5c153eac24f67f8e534aec54f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8994395f42398f9d78b97528fd8879123a9bb3a4ac4a5ef453dca27672c4e5b +size 2731 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bc7acdb19c555b9dab76222a0873c98ac701dff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b025e4bc277292e76c4f187a2c669c90955da9e0541abe00e348ea1bd676647 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..beab50df4961b31440414829e75e393729dc881d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d03e88af0a6bb798a2186080d8991e617aad9fb5f745525e2f86161a60830554 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0786dff08ff8bb160053fad108329170c8a5164 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aca2d5f93262c80913a0971f3b439c457526bafb1812aa71e340c6e8f11de48 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bd60bc3d7692f3511ae8e65c4be2404dfef42e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38db29d9a0c1d0e199b536267aba0ea19a23db18c7744611fd019b019b513dee +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c74b3ea46227546ec6c3f82bd5543af946aff919 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c648dfe1e52947b5a0b3503e5014299b44d92b6ffea14cf5394301e34437b26 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9765624641ef9d78f81b3fdcc3ffa5b5b50d450d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecefbd042be18f0dda5668669273ae8498736a2bef6c8d3e749369c76cd43b69 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5a1925114653b2dac5afd63ee5f5c22668ab29d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80316859bbe8f364f910d4b93986a55f17ca85e120ee7b461493be038cbb3a2d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.5.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce7323d72467a2daa3d5b8df186f3fc3f8e1ad92 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5589ad9f4c536526eddd4d52e6b1f6f8aafb7f812ca025a9c660c6f1c9c1c0bb +size 1180 diff --git a/global_step243198_universal/zero/model.layers.50.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c87eb8a41b05157cc55542cc8f099ee3aec39d2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01f7cee2ecac50e09acf22a90c7b5dee507d4a9482d0206d057a8483b3707400 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.50.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.50.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a4cb9324f2afd3d66eda09412e64296e606d606 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5cb9ae3ff77b21a398b6ded0c8e872a1772000505a13dacf761e3431756c411 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.50.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.50.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2be93255a7f084cbb5edb97e06a93cea392697d7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ddef746809dc15732a553bdb6e85f39700a176d6b83b5d5320d075647a5b90aa +size 1180 diff --git a/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a6a34043ff7ea810cce236edeab5a825e6390069 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b69facba2e13e2e16a69b5847dc00fcd253d84fa80118df72de9f2e99e684fc +size 1195 diff --git a/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..48ed49b56b73f57cfa18746b0969dd79cdc4da10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:977b0e878d361c9ed089b117b50d34ea35f20196ecc9b359040479ee10bee4be +size 1165 diff --git a/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6adb09eea38b9c94167d37035cdf07f52088a25a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c72c2711e2d8d7933056b2612cba0539df751f41cf4b262a275aa69fc7b61d78 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3f5d180e9f8db0293d472dc3d4848e37a0393fd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dde16b18278ea3749978361d450591d73ca07fa10a0ea981072f3ff77d9fca9b +size 8875 diff --git a/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bfa1853d9b12c9cfcfa954d5400d4fa62fb098e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b301ae4c5fcaf907593ef36c61a4ff94466473f3f8fd9e6959c3702c1435698 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ed93a0ed0c36e568e97f5eb9e1b8c6ad15d0e88 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ff7acbe0c8073bbf6437b54b87b1dfaba29dfe868187a9c45e2997799e870cd +size 1180 diff --git a/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b4e700f91112dbb210750a1b194592eeb91ca38 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53f11331125abd7f9c8d7f5a517f94c89a1ec1bc749d4f220261543382798639 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f90efe77a862016d0a6014853869e05b22f4656f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a3aab21ac6f3f6701d51887213d3eb1ecc41333a0da0df01340a4c6a9b344cd +size 1165 diff --git a/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d83afd9f849f26634f3bbb0ef68d1ac27913872d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a444f8208255c0f16c1686912bf08e810c73417bb2240b26e4d8dbb6ab4dd43 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a39d7e48bbe71899d5ae4f33a4b755921875586 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18ceff019bbf7a0a98101740e70707cc9115e4c748442fc76c1f9424916211a0 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..24980e89eadd874d82c7752f449ca60d26ab8092 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0db13924f14dcc53cccce1c716d9e3c6de10432892901311f13152a726fe7fd1 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7b536f246d79df52dd8d184832cd90359f58125 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9758b0f2bfe238df65e8e01306e33ef6afb140616af51d70a0f0218b46dd18dc +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b7fb36ceca97fb3ab6ddf8c6fe0bc80d912b2aa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14055d348fc4aa3d892ff8ef46e4b3851047fb6dccb5bc5a1c508134353f0640 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c508f76589c00597699031a8e52ba0fa144c072 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d5887142b343554a86c53d8491c0009702c7c57c5de3cd36a59bf30169f0532 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d946e6c1295b4f5495dbc285da4fc218fc3ea60 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb527849b253f7418b5ca5b8d73bf4fd79eb62b3b25942aa8b9119616539ff49 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6eef82f0c85a73f6c14a8017530521c1b13e19e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ac5966308315153119e8f56b3f8fc2ed0d9a24f8a5d1166e1467d3968a5825 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..591a34e99db6861c5cd1a77815feb9837b4a5926 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:248442f435a72796648b68dc50c2b330af6a13d74246af79a42f524212b0d96d +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a04b837e5b57a9bede4c3f2fe6148d5b0c13ede --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18f60331c54679471690c3e32127e770ad358923fbd1b7c463e08b1fc19f97a9 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5db486266b8dadd030e070820f3dea8f10f0101 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87ebdcabbe0fd359aee5abb7b6c6d25b55f339347605d2cf2eb5272d1fd9a1e6 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e17f891def572409bc26f1aa2158fe70c678d163 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1885b265a0f37064ebc22af3cb5360b4d70d7d2377f0587cd797843cc18ced25 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bef1a28e7bb30b6371da2c7d9f1e1f9e554a0918 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc00227090aaf7a5551fa460075ee4e9fd49ca09ed3c69a77591e858c68aa679 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a98cbff711a99bf6773f47e69412fea8f4a10e7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2286816518cdc2bf673992e8b2fa2a463504fe87fbacae012568bace67c0eea1 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7620105da96a817e61f162f89866ec00b87f9988 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08cf4e154323f0088e43b0b910421a5bd1391bf648cfa54166b1e0c86ce67fb1 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..494fb2e8eb398b82f73fb2fcb542a1f88351f6e2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6fbf299394e105baab97ca084cfa84e5c7dfbdfb7f14893180872dc3f1ae99c +size 2716 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d673e1b136536d9adee177f07f1c9658a50319d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e4128ed007695d6e289d3822cb4d3071ee27c0854b7eb3f5fffbbfc5c11d1ad +size 2731 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0449b70c0227a2cf904ac188ca4bdd5357eb6256 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ea81d06979650f766f7007f755689f69c64ddae2d016637e474c5ba1c1e68c8 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8f901493162168642ea9e4a2faa7fb34e8d1f0d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94c7b11214f9b584b2c31eed838e1021f3d055413cd84841f589eef3ad2cbda6 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..edece594b2b220fdb721b1ae18503f066d5855cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94f2c8c7990d8a04e6164bf06091d726cc8c546cdacfa01ba668f02a4d95d93a +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fce61a5976e1f71bf3455ccaddc243e9731315b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:750245f632bb1ee01fb3d39c6bec2c93df5c2b0c23da0dd2292c4dff0ee3df54 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..74055839d468c6559fcdf4af6ae3b1a9d9e20c61 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:848ce5a1fd47884ac9bde174b810f1e73742b27c398269f10aac3564c9f137ba +size 1180 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..de27de68a20b831eb78b1df2b15b8a852912501e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e27e8edc4939f7fe7565ec1f59ca3e8f0c5948baef73cab248866034f2f2c96f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..02eaae4f3ed79929a4516925e6bd785c0b4e8bc8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccdba04d7f59a6a4f8b43d211396fd498f33efe105e4ac2920d16a055caace76 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..93c20c8555718d3581a6e6d058e5c5b1c258fd71 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2580e08af0ff4684e4c398a52ff9a4671aa6a4e32002e0098ca3b06e47b6170 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..164f580218d38caa594fb836619329857b071ec7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:892137158fdd5e609310d7496f512638e1a2321dcc4a08e97d8fe27b76a55ec0 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d018b5b1c03ac4033afb3537c907c8c2e4dbdf67 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0898e86a4e36e9e7fffa06dc5063291828adf9f05d6cdcbc5420bacfb083043c +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ffaecc60e74983bd2ba960db3b8bf20e1f8bf10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4001e7024c6595a1b4e5df7682da93790ea9eb120e466377d5fcfaaec7095e23 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ec0239c162fc4bc7e7b338bb9eb209e63b6a92c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:301e106487c0b658ddaaba5c58664ea8424810e3b6e9a394e3507767fa49630c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e07e917e9d82ee2aed85780c74b2e5c28c28916e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e6547d4cae5fda7445cf1acdaf9ce450f91de159b47439761fa8a291e634622 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..03e4266df512092fadc949231de5cdec2fd76faf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7f7c009d671948bf3a08ce20e5439eb377360ec8f53f0f434b41c0905897142 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..72c7a38ef8b42f9b1f605d20eb9bdab94e84e71b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bba05b0c47e7b84056c24bd98f19ffbec4d3b149dec9e581e7400c5c7334b66f +size 8875 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f8dcf26f2c6b5315ed31dd49f8031270af6d9aa8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:134b846da3f8a1d48f569b643420c7bc6c0bc606673a43fac64f2ebe8ae7bfca +size 8781 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ce5572b2b3507a6503c9163b2e63a260cdcf8ad --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc4a2acaa4625fb92c4ca6e29f2b1e54a2b0ed92918a55f068e263a3c3fd073 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a9ffd0b6487bbaeb91120eb66dd540850f52634 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cab7029afca1590e5927884262bdde2f0d710512b71ccf17dee9f01164ea12e0 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1aa730203ebe6c35116b04fafef488a29de4ab1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a4c546b2f6be5d127c096b41591e8d180980ebf62794408b00acf9dbf7d9564 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7a8a1bf6b7f351f62e75a0e1e79ab1f128d85bc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d16d69af0ad892075ffd6456640d39bbb2cdb5ea040146863d24f8013c1f391a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..474250feb9f9904a3e02af7abddc33f5ab2efd4e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9867df544527d562c44bc739241bcea238cd3549aed164880e74c6641844c57 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..00a8a7f8b75a4accffcaf50050a1d269c809635b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf48272fdfdd8842c599526096a3600e557259da2117434b957a4575acbbb2ad +size 1165 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3dbe2e65ab9d7dfa85adf57693a74ad1a04747d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d38eacdca8b6b526c00ffa1329ea9e25e3fd5687f64121208439a1fec636db +size 2716 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8204d0016371838d5edd67928011ee69bab5c148 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5642d5189b7479f4f17634a3abfe1eece90ee5e2ca5900a3b1cdd0d07f5364b +size 2731 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e879dc4f2bef7433d106833a352fd124cc05a05c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44c3e5e6420d07e07f2a6fc53bf15a848e6c1ae4e52741b86727a2ccc64e2b66 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b571e9e649b632baf51c657ef6882a6513a5b502 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a64be1363dc91b1dc5ba0f97d014d6aad0254bef30a9791cd86e5acbc84638aa +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e95fb7720dfe701a9ea0dac268290f21d587b46 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56f3b6b2e79c6d1797538eec357263da084a1205e2d51dfdb3fa97086a25fe41 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b86082bbd68f42ae8de0428a9a36a578bce546ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3726b3354bfbd14a655737cce0f068860cd16e3a9b32cb8b9618d8ce7e92cc6d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..60b0c108dc38586967d69d39cb87ac87304df360 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b080afb5a293495ebd251dd871d1c95acd23942bfab3ff521d8411d74c44e075 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c5f6acdd5986a08cb2334b8da37c7ffcd3b478ce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61cbfedaf454a3659d4105f8cae7b25d9e42668ce6d147a74dd37d79f203e841 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7ba0685b452e5663bf68cd31c47e984c1981eef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f927436d597ec4c1fcbd68fe62dcef27e4c9d94f422a0cb146c61d0e7a99ea0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.50.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..408087085d9028a61f2acb0a9f14bd6933b74465 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a185c30129e6f1f707d35dc966fe8fca80cb9adc52de7b73dee95a47380febe +size 1180 diff --git a/global_step243198_universal/zero/model.layers.51.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dbfc5f2a2b480570bf19d71bc5c22414ba890b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94193af02ddcfc4659539cdf61620fd892abb789aa2678539d848f8326564e77 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.51.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.51.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8243f281d336c79d1747865fa2eb184786748ccd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe9ebe66c06ce8ca82d0ec2aaf0b7085ec79743d65627382a7f1e6e9bc47782f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.51.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.51.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f360bdcc5e648ed91613bc09436900f5dc4b3533 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e82916dc59a5f516ee219eec5e92c76dbbc5e667278cf86e9b578f499602db93 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5dee84446510a6254130563aad2d20c72992175 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6674ee9061493c77c660ca2ff97ed078724ed7aae920cf60a8aa0e11e64adaf2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4581051a114801f500226a5314ad27530dec7b2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d63ff06e97a1e3cecd0c8c13189a0d6d7d49dfd59050380bef32f7d0ee96e6 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..524601b133546b4d150f7497ce981baacac3de5c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d521ad48737df393a2b2238089212b6be9ece9f46159b48d52edc236c9433885 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..21a037e50932a611b80332b23019cd3b55f232e6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5bf63276fcc2892c365467194f8aee8f84daf9dc429683f98ba4a4b2af34839c +size 8875 diff --git a/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae23680b1b47bb1cdc96a7e49b8ae2ca1409366b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d208e5e070c8178f02fefef9caf17297746ddc87dd53f8c149707045cda948d1 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..54f4a6819a7c0a28fbfccb6181dbc13305a7e495 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c42649ef76a8b9672e9e8e2685affbcfdd33851be6f5088ee8b08523a05e75c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdd045ce4fbcc7ff32ba709f1f761191c7a48ea6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a46ae83a399bba72ac697246438acc978624a798d4a9c28b58486e56e1983c4 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..159505fc65cf07fbf09759817c8f503f492bc20d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af9864f6413900ec4d2c9c5f68b9e42821f1824dc96a7d2c418a5034fd9ed050 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..48e7950c76a4fdd29815116d0bbdc0d5e037ef80 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:667e19e19e42b95896729b2250ad673d8865754c9cfe0b10c9ffe36475866780 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6375279fa0956fe9392c490079c6b361428da9d3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120927c71e294a2fc0dc095e0edfba3f4f2b5035691bbcf59c989f7e181624de +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4cd4038ba1a6e3bae4e6c827ce19aa289ccb59fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c57565348e060fd9816a6f47629a290a626a5465ebf1127eda169e6efd2f163 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..849af2fee38a6313ff20531b19bcc5dcb0f5273b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9dbec802633288d54035d2b31a52809ee38e43cc7e7d0d5796c33c477c5a7f0 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..88840a2e96d9e696b9181af33c549f03a5242c9d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4463313066b09e1339f6a14a9fb21f1c2cdaf7343400912f6a742e50e81353a8 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b02400b087cea69e7409e088ef90ccf83dd9bea2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e4d657135def1c296b9788c050e1855a215f2f9bbfd96659bfc6624d9ab23db +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..12b2c2f88a1c4b0b72a934090e58ccc099995feb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b104fbf4e11678547ccaa958d7b8f3495e9e154c485975cf3d8ff8a957fbeb +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b20ad07b9bcec0fc6661d88f55be59077d3dde07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e76ac02db84523533fbf0d6540730a55da5a13fd8c12c54257d26aa68307f84 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fda342482cb2bf85532ebc2af40ff4a51cb4e0d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d08a9ab64137c7a37d8beb0f01ee48a1ffd7f20befc3215e15a40b058ba6abf3 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..310512d8c21d7119380abb06f0d258d7870a732b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5abe82723fd8eb2112892905531ae052efeced5266f3de75c43b1bce3230a84 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ec94627792272838b5728bf4edbd9ab40f53c3b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5670065ce34e278306a9996dbd9a1bef98d6a1f9bb9d5d5583d9960030e1a54 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bec06e98b485a1b40435d2098abd63f7391d6638 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f4e683ec72dfd433ae01c05b3f04062f4cf7fae7a20b134f7fa6030a0c4e972 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..97efbe0ff4ae20c90627bafc07465748a4522747 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:120f7793074b4e769ff92545746b3e6746cd36b525922c552734a780c9cee93d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1325519650db2ff15ffc08190b2ba096a1c3c813 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f794001d6a0407b1f826df739c3989f8871e6df648af62cc7bf581f0ab88d723 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..45e65d3c9ded94359f51bb1708ba94ee5abf8dfd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1027a24043c589c10f69d8c332b55947ceeafc16cd75aeef50e778ce9cfa90a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c7af26c41baf582b7f6a8e60983dddb938d4a90 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41fa21d92f03d76cd12379fc1c842de069525760357682c54baf093ddf798f03 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4715bb26353e287479dab22ecc7c838405ef23d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccec014421b41280acb5791707c7207f710965eece7207a1113ea7591cad59bf +size 2731 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..34077d787ca24aaa792014dd7c73fd748d7d7362 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a16a661c608b2ea085209961152cc546fcef7b84964ed0e610b1e01aed587c69 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..74b3a901c4899b47d6db0cad830bf95ed0d4e1a9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154e98be629ac0115079b1b67c645174ff7eaab5f74048630dd8a69ec4942897 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..82a4edba531884355cf9e3f7d6db9d7f76a2d131 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c85b43b9700331f8833902a3218c80b20455179699a774142bea6e0bd49eb3c +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..993ab1635631f6e301e34239ae6cf65a9daa47f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c73221d28aaab6a0d3e1bcf690ecdae542c3ac433bd4aa82092c914a598a5774 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..35c8f23ac4e0d4c8eab4d3628112a339ce4c847c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:603e63382469fa3ea7dcfca9282eb9451e7d2bf18f27145080d8df6114c876d6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..958281fb496e84a04d71a870c8ca083decbfa903 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed217c9c793a7dae0ea24548a844a5d698e339286ba29ca06d698c87320e1d8c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f338e6b57e7d32dc479593558bdb26bda2bdfd4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447146fd10d025e123adb2cdb8fa4432c3b193ac7870cd0634f38462d938679d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc8556bd4827f60cb7616f583c3807d00f007a54 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e29ce26b867992f62392baf6353f0df433a0c5cb5c814a75b106559b3cfc85a +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..59802617fd69c4f580f0dbf6602d1c5a6478784b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9da6fbd4d012b67c92eb1b8303fa5c918b4f4b1fdb6664537a15042879b3dd1f +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b88415d9f87a6f15ae993f5a5194b62ebd80cf24 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf2ef571229c53b72a73358ec128f3f06df5d2a523050856b1b1021271e5cd4 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..38318f5823c6257e7fd56d39b50ea615b6c5faa2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c860840abb621601510231bfb136b7bab5af3be91e45b14b05b3a00af641ba95 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..17ba180fadefa4ff8f7b9e7c39e072f689288327 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84f6807ff46bb49981ead1fcb2bf088c359d42cc9aeebc0b31dcc68eb34c665c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e8babc004176678673111cc78882f21d7b9c9e6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0361d59390eb5f28f909984f819885be459c04bcca1d6540fd69049ba46ad9f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6edd3ede1dee27844e26628b09ae6f57b6a3396b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77fd1fe2eef2dad1ec47d6fc9d9055faaf14a629da9b014d9f71f41c35e18a18 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c9e47f47ca6fc0f898af7d0c4693b3d38bc7dd0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45e8638f239de4e12576048815230ae61ec27a11702eafc1511e18b16f9ad4cf +size 8875 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cbea643611580383a70af6f529e5084822b79def --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3c3b9694ffaf4e3b4e66bdcc320fc99f108de8f5179b3ba9ef111d96ad12ab5 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d163ae026fd2efd08e41821d489f06e6c0355718 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e747daffc660ec8a6bda395e4236cf017456c6a1168210cb1fa154c6eeac5685 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d7f1da00b9316cbbd991a1ac239ae3d7b5aa250 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48645f4222b1980f6a0ffe1df886bd29bb2028e9ab48e857791f524fc96a3f5a +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4649152a3b382e98fbdc155e78817cd8139e330b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17562535a7a0f0e718166a14f755b4090204ebaee98797f1ee29c252209f4de0 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..eee1e33eefa6f406b53b8166402fb829cd793fb2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2964f65bcdb2b56d2acc634719da627b77320362ecdaf9f3821ae79a4d3b6b98 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..18ea578391453c5b6b52555f4599bda3640eab52 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12dd597c1892917ece610f76858c1925ba5b60a0993c032728246bb5282cf9bd +size 1195 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..93e22bfa6481b6bef3933154ea63ad8ee8650b10 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbdff0d5f12d91ae785f4e78d5839b5586925b7c0ab32499bc49b0b242420d44 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9c1c2d50b9b26b81caf1938dd9a2b59828d0337 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec055b9188abafc4bd1f8026745625b8af4c6ee05ba3f9b429e91b39763fc54 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..43ccf46a8957390c030554b46a030013359bf513 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dc849a90cec770e5270dd737ff4b22604374f82fab9d81574024856262bd51f +size 2731 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f3fc24a01edd64b0ac3a5a0ef1b327ddf1354216 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f488ef8810d9e40fefb2e3a6b2566a4bcdc6e17332505e93abf1d5ce7fbe1c +size 2637 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff68345a7aa0e38ef2a9ef000aa102a2cfa66b60 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c35b221861bf07b5f00c75b92979352573c9efce8e7fd6ef919650a67db238a3 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..be923d0c7029931faa516fbcab6664d5a5bc1e78 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:432f82ee46cc9b9a6d4c2ab8a4a5443b85f6353e9a0f086b241605f93143cd1b +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7e0bac56ece8a3c9d3a49f8422f9d634fed60f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1949e4bbbcf02399b17d3fcf085a3a31805eafdf9df3f895796e6ba7a3f4a97c +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fbc72ef01d0baeaebcbf077919123ed47753741 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9921fc2193f5fca45e5f5a726b38e10635824a83ad988a88d216c9578e45d6ff +size 1180 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..63661f27533662abeeb30bb3d76df482eef2db70 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ead07383642ad1fc53ba22a39756a13c9feba8a18392281ceab38f5ba147339 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..387e06bb1eb0d33e19c86ca032bc13e54f6c8faa --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:511e22d396a7f9fa92f18ff5b19d21192ebad42cd9a9b3aa1e5e0b10d825d173 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.51.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e25f5b7871154627f60532554e9c14c4abb2e99c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7c7fa01f7b753df2040289097cac9b82432fa26837acc85a4953816ab0af999 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.52.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ccb664c6c38d5b8b81ca288766ef5969b32afe8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7cb2831cf83a3b9a53dad226f79d5587be166c72f69687d95fc749abaa55951 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.52.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.52.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..14c7b05ce00269a7f47de8ac47426f8812145163 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:007f764d1b98240abc813dfb42348b284aed984a9d2bf10e9e0ef73a57403b30 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.52.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.52.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f9acb1623685d4eed49472aefc041c95fe74c7f0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8876b29d951266cc7ebbad510e0aa7286497c124a59309632c75b4f291a3f678 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd950ff6b95c58f962de3cc8a1c7479f07f5e998 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591c758f76319c271201fc1b2bbbda752cc2556cf1eb61e4730bbb51b85f5508 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..925ccb5fa67fe4dbf34e866479a58df1fd61671f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63547652356fdd5b67fe2f7c0b2e5f6b4f199b70056c43a3b6bf569367b8929a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..345ee1ed2f81ae28875b7ba242ccd3b4367cb2ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7618c1af4fa367aaf163f6b50aab3b09b39f6155a3dcb1b124955462d788b319 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..962862d5f687eade1740cbd4432f8f3df7c3b003 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e8f4dd1e8c379f0b0ecdbee633bde02bcc9f9d937c3081d7eb245e0f1a7e0ef +size 8875 diff --git a/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..631973af2d7fa8c5991325bb9847de8a6008e7cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ab7bef5d7e9e89e11338afe3eb6034cc4aeddf8f1b11f908d393a2ef47b78ce +size 8781 diff --git a/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..03599466ea66c25953ab3d1701874aaea78de25c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a69d78a545e1fe41716a5fb8d4ae3f839a1b5fef05090c1885408f7a850d8a57 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8476ac191b93c664855c1bb7350e6451a8a7e02a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f5457a6ce89a3aa0dc9a0eae10c2ef7d4cad6f411da310bc460d1e167f60b04 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..858abb902458ec8ce875864957eea5c2ce277e49 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c494e78aa983a41a800a61494fe272157cbf625facee96444c6ebb98f658b4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5327141cd7c8ffeb77fc6a2882dcf0e920ad2407 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0117ec2998c323f70b1bdaadb12660340ae5183e67db827416d98ae186d9ab07 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..12c848ae333545c1ac7c4bf06d731edcabf69df1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c3be3a4873cefc8bf400ba70e303a254998a12da29c9871290a18a44122ceb7 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e832726fc3aef6f7e496c11a86e62185cedb126 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd947b4433117177828fd70facea40672b0a947d1a40bb07f32b3790d407800f +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..80ed8d810f8eba5c9d76cde01e96c1870f8fd248 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a06c332730d3b1728c11e800808226395f42d37b80b265a5054198e1b6ab2f5 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9d0a5a58a57f12cde8091fb2153c9f8598911d9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ccec560c78276085f03a10f411487027daf5bd6f36fa2bd3a01a1b8332384508 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..30acaaff76472130b2df1a5de1b4b3aa60032a3c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49f1aa60fb2d586002f37e673d3efabf1086a6aee64f082374f2f7320b3efd77 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0c69148dd7a1cd285de9cd436f011fe77bf6f1c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f80e0d7b94fe3c5d1caaaecb4a680f58bceda31002218ef965aec869d550a80 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..43c64f7b6968ceee9eae77ff2cfe9b168f6d52ba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c3af7fa97ae3ce2dd3d450ab9e6294ea98574a4f93e65f5fa2991fe0d7ec578 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..16112fe6c23d5dfb5668ed0bb214d1e87784e2af --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dfefc8d6b3173874b8e907025252821c4f96b43aecbeafb6b165bb4eb96ea40 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5169ced8289013ebed207d5818f83610426aace --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0c3a2c1370fd385f082f3a3131081fcaef34682a8cc3452d95ea8cab949e886 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3a7cca0664c683a7c186d7102c74d1abb87ec87 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f095e0bf3915e81eb5def20c1a22f68a642780d54cecc6d3306028adb2ba57f +size 8875 diff --git a/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5aadc752af98f41ada7e18a789d75bfce9bcabb0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:939a79aa19dc38093a4fb5c5a056f514bbf69e9a8d7478f4a0f9a3dffe63310e +size 8781 diff --git a/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ddb49b31e65bc36d2e3ae7002c3509799c3045c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6efc2b4f1122f4519c9827d14200c1792071db5f18ee488d966de2c6aacdc23b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cabbf1e6b7584d4840e5abe99966d8021b4f11e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6159c38537b3dadd0036d940245431030944121648e0c6609bfd65acddd61e2 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0794ffa9936aab4a37d22d9e4a454750aa01f7e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82cf6c72dfd337b67f2a418704754ede187605ca98ef30ff9779f4d90d6ba208 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..517ab2b5d0314b3d3719b856b68cfe4e45790d30 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96c5b48cbac8c5e8561a8fdea2cf9dceb86fdfacc1df3ff26f16fe3d2e33a4e8 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a196db1c2fca3c6d8094650175cff069ce08145 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19540f4fe9a6ad616b2339acb5cdf7323c8b6a433c499498114f07ff3179097c +size 2731 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f0bb0d715ecfdc199c682be87a6e29d54536481 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca61e2d81ac3699b691e79ac3fc226c22b92bfc6ce5a6787ea2eab9276fa58a4 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..114fc76aea83780d5676faa82d643e6ae7dd835b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc08df39acd2ee2fe48a160048f8396b1c5a4ffb7d2e9909682395f35f94c3dd +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d656a1d17db7061b3a7e4cff5a4ead8d8def460d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f3815a20d3a14045def210f5235a974018aaf6475808d601273bac6703e329a +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e4ef7c837c156472e51c8b2a8e00aa65884c354 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ffb83efed1b62955bbe83b9684606cb4eec3ffddd56759411dcdde1e5567154 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b07ffa47cdb51f2d896979956fbe967f19bde2ce --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdf7d639d1ed9f239f9ade23dd9d3a1cbf6ba56416c835e68ef1f8dcb9c7c66d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e689534cf0cbb23e581a1f3fb5d4caf4b933e93 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c8f0df4e10804b823822f1e4623a4aaf3609c1a6a02c3627883be46afc38d4 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4acc50abf2abe3bff3620bd8aa35407c7a2d3a55 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:768c0e41cbe65e9eccafa9b5cf64307da925e28408b964c96b2a07077dbbfce9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..be4f19629a0ae2f45046e4dcacc7af723d7fde07 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:297094bdb04ff58ef16a518050d68777ffca404f6b44cf27de86dbb97db46a8b +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fed1e12463bca29b5b53eb8449074271c66ccec9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6daebdb23609caf8a5d3246122562613ffabc829eb251be3716008880188280 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f850c8b466012c0b19f3607eb91ac06a3183088 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e877cc7d985bd4156f4e19b0341238050b54425c667b6754268db464b72f6ae +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4738465c33db5b85700ee1dba4372da79d714bae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d40e041225263e497e56f2d6543af60a45ac93fa95799cf7b7b2bfb21bd1215 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f9a3174c4bddaaf06c3cd292122786ccedaebde --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec62c903e211ece30c075037b57e972c525757faf5d96d23e3fc52c7eac02518 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..41f4ee1c089bcd9b1447f681d9b44a0a9e24039c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7b5bba940a7f236f0ecbc00cb5035daf1eb7dedaf9539fa39399b378cac5c5f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..21d2eb04156a7cb85313369c1fac81b7070b1cd4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c6a805591539504f4b6c142a2f09dd3d8c8c38de80e3cc263ed712cd571d66f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..46259eaba6b684c00d3f02e31f50c9f6b107bc8f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:407f23b596c13662151e6820d3dc33e00b54672e5eefb6b454182ba695b53bd6 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5d7fe71ae9064d6f9bca9a7e9b249e0d86012d98 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a96d83073efb76a6931eccd0717f434cb49af1956d9294fcc6c1db11c4d8faa9 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..73a9b44563a07b7df499692e2f0c4c01fe510496 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2b4490620a3773d7b86e3824acab3dc58e6d8283ce73e949f3bb9d968e607b7 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0c7aa1b3ca3fdab4d32ec0bb1b7a8b40360ccf0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa5ddb6564f6f11776d118038441208206648cfc282c2e655b60eb14dedeb56f +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae6e338c266fe9c7ae64959ffd5948445d2a4cb9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:973bc00ca04f7a7645a0415f9c79e5686e1f75b85e23847801c81312b07c80e1 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d7d989070a902ddb06c3e3760dc8db13e16e8f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaea22c87d40e596a5399dcfed1d8f851834a98f85abf631bbcecc222bef7c14 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..21b92aa8d155e8db92d60b922c6d1027e8cb93d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab22c4ce1102e24dce286ece9876909802a5bfb58b76635fd487b29a5c691368 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c25d02b1592e83cdbc38accd4d03260739cb2cb8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2283d57e1d606c8cd0d36524bc85416a9c63815af905ee5935b66e08d3237ac9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd83523a811b71e151bd76ae0261a0c277aefc49 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45aa06612a90be08e1910e85c0b36c53baba4e6fe192b715631395a21928f6b9 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4731cea22bd8bf8067647220f2e23151aeb6f78c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5decc40be8007428da2c61e4c9c539f38cd61acb4e01b15ee2f41c0884220d64 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f23ac90164b656562039a6609aa81ce5ddb5b34 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e48e0e902181ad1909e5b621075a7e17d06c6cc2dfb4d96029c5176e355495c +size 2637 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b833a0c42291c474d768e546899f60f3742027e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4febeacf689cf9a1eb005d0a408627a50d1c65b1c6465b48c4489bb0c66d794a +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1956df2f8b861fa4a42d88fa9bfc5cddc81ac0c7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e2f190da50e52433562ef0f6e65403167927f8f4b0ba59b5a0712fa7b86bcd +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e39bf1b0b1a78f4cc9ce33219b471a18565143a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:400136068179e902c9ff2290cf1a78a1753e3a7c3782f34876694ad1191ff514 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ea4e21b87839666546ab04954ecfca3e1db74ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:682a119f11133da76cb4a3e7c85259958ddbf8a138b6a74e7631fa736311117c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..304ef9f529331f06d26369d22bc1e830f7aa029a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47f62158bb97da27ba20dca0935a1e1e942ae2757f8dc2d32906c9f8c585459d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5127f1fc8bab0ff801b4636bc98d14a9da4dd83e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa8f2fef4b2bf65275fb54955303e214d14bffd3536466b15831bb55aec4e973 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.52.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c399a885bd906feb424753a66aaf163d4ef6a9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89527434dd2b544430047aa6212345f0827105423ae28c3a435d4669458048d5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.53.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fa12d280dabd99e18bf5b899fa25f634f78605e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cf6574024a92246fe88f079a892123831e3ff7264f24d6416c1432381147c39 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.53.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.53.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc9501cf192b151b280b6c84f4a69d28a970d047 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa60f38208b3ec1b5e63bc2fa7f072c730838e17796efaa33f3c6213a7a9d928 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.53.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.53.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..06a6b8a4f63ba2c3e826bbb319959fd5c7c5be64 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f309c30dccff396a9ce1de55a44768b7f7b4b19dc0ca8b094f3259688bf9321 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6a20825624a026a9bd64ef7b0282deaf6da7b4e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30b03213e68da2b40ce10b2c8dd35d118aca50a1445ac951c89d883e4ee9ef7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8863c5e380d4174917cb5aec5998455b6bc483be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d00fd8e4564252800b65353afb21f4b8eef68320a13860bb4c3ec67d6a694bda +size 1165 diff --git a/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7055014ba1285cc4ccafcd17374229b5b284d48b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:87fa2544df526705e812261eba5bfb981a0adb2f748fea79f0acea94daa273f4 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0548ffe69c761eea14c5ac22dfe47de58ea221f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6993380be19d715544f2a6a87304792f6e991ccb6de88733f7d0ec7a278397ba +size 8875 diff --git a/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d99f45e5064a75e13c7405cf1df5695e3123d4c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe4660222369a21773347d2671ae8bea298e826b4a2efde00666760a6bad76ad +size 8781 diff --git a/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..690c12224d0d5b1d0ff41e7127c1adeb326a43c4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35cbf9c1fa12fe9d151401d8022919b29376f381b6dc5c1246ed8ac7c408d627 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dada04463f91d9c7c6668382b2d5182ce222a3a8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7080cec688288879030faf6601f0bdd3fac631e9151bdf4e1f990c340efbe3b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f270559e2b634e8eeefd840a3f6345db01cdfc3d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:457e4081f9ca60045eda27b2878075129d6576281b915385722b646224516449 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..05dd0f54e6eb4c21c45fb5026f4d40802c30813e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:952377d5d90cea742f817c41aa298ebe868ef257b5a036281e7e21153f2bfbc6 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecfcc4363cbf2351cc6de7cbc7158beaba6af080 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b2d1011d8f2e6e655ca087dd84af888f9fd251e31e0be1061d7b9a7e4ada3d +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b3c14873f384ef94c49239f3d32a307528ca95e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5c9818e5bac8cdaf835b6854c9dfccd50accb51eda019fba52d00058dc767e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc281b1c7bb818693b35e573adc5427f5bd84bd4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c3623b0f401acf58cd4a4f80edc2aada1d6154df56beda952951b7f7d95fa1 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3bff21e714b8295e3cc6ff4a232b3c0341a12bbc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac393ad56ea3e203539be84127c3f9dd1b664322d0d0999025ef6f8ff3582a19 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a7af3ce10582d8c7c29ae879a3794551e925a9d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab963f20212aa1fd519b90e257243ef0be7bf2d74e943c5a5d25befac3c1e304 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8dbd90f72a78639cab62271fdb2cc614eac7058 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d7aa3e9dd09b7a52c60a1c3b4e02317c0f1d12cab3fe1cbb08e320c3add19be +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4b61f1b82143f7212abfd9b4ac9f11f896e54e38 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eca40b31f0cbf1748317a470c86e4884f4190b812e90aadf72174588723ebd27 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4d60ab5366431960d402af2396c0c005d9030d1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01952527366388a8228be21adefbd53634379de6accc7058e37ff15b9f2035d6 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3850673eabc23eaa63b3d0ed56ddd71f92e2f54b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a72f6026b1d88bdc4aad1e5f210088db077ddca45f71be5e63474e1314052f1 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e1dccf37e1d865780e855a2727d32b2449bb3ce9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69dd6076c3c8dcc20524e1b593d160dde116bb12335a1aeca951a9b979b4e8dc +size 8875 diff --git a/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f39c4331acff4583cd7304bb706b8bf53426563 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef23f83b4b9e63ad7ad1cb997c5fe55b98a8f7d75558750c9b19bd77db1a49d0 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1a9ccdb62763c35db76808f99abee4eeed78908 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:746023328e021f85dd80341d37d1675053ba34f11aa6b48db7287ca5cdebdbf4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d47f7e6995da24c550e2cdf8f8d04d4397f0aab8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7841c059def2c9c8389aa8b7bfc35086c96a88a7a9a171863fb04af8f9a740 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b26a4b1230faedc0b563862f83438a0442aae75 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bb900a2273459bb6d2df1c8a72ff1fef6372832d3377c663a973642c6eb6e95 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9076d20bab442defc7fb0aea7eae11c65e3ee9a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6465f3da71e7e66d087ee0ce5c39e9f5ad03ae7a4ac634fc4f15e1fd4cacf086 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a86b1709392776695e73a129ae544ee917a72f7a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78214224564ba4da661c2cfefb2302012e12d79a7e4c5a04f496754da15b9726 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3cf25a5412f674b49a4bcd8b9bb0bca691e531ab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0321b5f39245f85737730d90edf404a092d73c2d6dd2c93ed450e365edc170c1 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..88a5b39aa60d971fa045d92c23b54543867332bb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7168802f7b5f2e478980d9c1acb31f4e7d43f29c6e77416b08db7aa0ac07959 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7fc1e246f900e821a3a63e8347647e0e88adcaa0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48abf2ae2f06f19595e3bcf0aeee3defa40b45160a3947e65a196f447380f7d0 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..018eb8246be4fc8fc32e12a4f6031ff350a2c24d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e51731b3bfd488a3349a5e6c3b7d6aeae37b13a258db88bb5e0a638258050f8d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c63c9cb9445d7f5f0f9a9e4f8995e39cf327027 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a56c817bd6fe33d7fc87de8d5c9b2b9f2d902f60f8608ae419c0c9033d846c1e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b364c893bb7232c592ffa83db67e15389686554 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b6afefa3a8b974013b2233ef9af4559b26a1bbabca33345380a920ae277f657 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f36a4c346a57506731d00073c8e605b3042cce51 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1421fc5e9443d8e253c30aa3fee4149d0509af4a00a43ddb2174ca4e6fc3c184 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b87973ef1c75336252957b65526f0b6179b7395b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8253c56ca1912358b54c1627d40c28b4583657d7eae7f07aff258ab594150fa9 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f7f5008e6b416fb5af4d17f672a8a2a14d509c7f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d1981b34fbc333821ff8bda67339f44b8a737e914df0559da549bd346a9d276 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..50711657349fd00f970e8338aa9f2b3af86db482 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab93ab1fba4894003dfe2194847090b2fb08b82c08f5c8e605019ed11992ad5c +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..616c38b0892908189cf27fe0ff1988317c93d1ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa445d987e8338d16c4999f8eb253e7d0c7491562dbbe80506dacd7984a80559 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddec048c1ff39cc678ec6fc79311264049aad413 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71bce441594dc0f85e04bdb22fbbd436a9b33c9bbfbc982a0dbaa52895f6ba93 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..916a89856b9df7c3ef9a8f710318843b036c72c0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9d759da69867c73f42690ee0c78c3a65c4188af0d0ab281ab5c5e74a92843a4 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ae4aac758a1106a8db948a6125f5c1b702e9ddd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53d8ac0a4b9951aa498b5bdb64e52d4302e5c537ecbfd39796222ebf6fbe07ce +size 8860 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..afebaa8ba4bc8dc514267eb6acd289b85493266d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e7c8e3eb2d6d9062bb8c6f9fd295d9f49157298acd59f4a095a9a46628cd625 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0c25f332d7a99cf3a3b004345d80a4c0c729f64 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a383d9fd39d4f807ae32ba5c5b7ae245901a1e6497cd40a0566043bd1e6df70 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd8278a82f87bf81a352039d586005e66acad23d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb325eb7da4ada5c891ce31755b024267e5b262d61633a89becacf5e53ccd28 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f2c949dd4f1f1d6632ee49af8cb5a096cf078c2f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:185d4834d5b3c15933175eb587243b920d4aec3181c6d123157e7676114315fd +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2dff8d08178c28b96857ad190eb208bea890ded1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:923fdf08e91844ad4f3e7df161f9171bd70bfcf1f5f881761dd33c00255ab15b +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ccc631edf75bdc8097b9358c378d933ba1b4875 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59610d56e7965d7c1263a3c975e4505134de46a7db280a93504190019871cd7f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0bdb08f15b56baa3164c72d5fd8c0858e3acc04c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc847c558a665d5c18374a01f6f443b48fe7fd370df8fa8072afa62ad5326589 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca76b2f97a5e5183313636a5958adee60aa3e2c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3efdd39303a18285c1d149802c651111c41fafa9b2101783309ad18841f54da9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..74909036c3a4327a150e91b085c8a43973404212 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e06455c208a64c85cb7edfe4a15224ef84bac885aec2b4edeb24b68cd175a239 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..912dcdcc9b408335258cbf36675502e05a7faa0f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfb1e34b1b2d8d37ecbcbf97cd629f438ed125fc29de668d3f2a13d6e129b35d +size 2731 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e20760523f3a7c89221def95214e88bf5f40775 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c8f080026678c074b1d39627a536cf06f5918693a78cd4b2a1d6e41acf96789 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e03029e27c150810eac70c6142372b761ff3c1d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490012cb8293f93c66b63f9501423c9a2decad1fbe6f4e0a47f10c57281dbd6f +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..20599ace27848dd0783e41601733aacb1b8135b9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2a291f09c0433555b02ffb4fc81a23a875d46f14f3a9c7ce5429044126c05cc +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8505afd60d5524a48b62c90101d157e273f8e686 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64c0ef8fcbb0b1d88dcaa741190fe81fef8fce64e70013e05d748a9d7d25c548 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c674b9fc4691e8c1ff1d0bc29f49902f8907dbc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e18914fbddf91919fb902834d0a598adc59cdf85a35badf08c59af2d44cc10e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b1b883740f93dc1945945076790d3aca8868e8c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4894a76536cc0e7d6b64e8a209b9d342477c59949fc0d293c278bfe39113d6af +size 1195 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..00874caac694df2fc141c3468e60b81b9ef4c9bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96c8fdb3770a6b51c8feb53bf2737af2c183dfc2fe76b621e90ad62da308d9c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.53.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d9f6d82bd45e38e4211ea94947fd03f43ca90ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17dee2be9c267c0329c6f361efad30d677193df018d08f3f5f8eed467395e3e4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.54.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..45f34ac9f48619dce5da0b20fabd24aa9e1b5fd5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e401950698a1f10a19d6ebfedc16995a808ff76fbbba51acde9fdc378bf5eda +size 1195 diff --git a/global_step243198_universal/zero/model.layers.54.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.54.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..044bab5129bb9b430373fd4da010f9829badb61c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:302c26f82ddf8db1870da6d2398c63955002b0b89436be0a9462e4003d072621 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.54.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.54.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9120f3b94f17ca18c3b557c5ae4058080169df49 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ed743492445a1cbfb9be58477a8f90ff808de6af53b3c55e61abf11d8af60d8 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce8352fa579346ac01e4437a2e6fd18252973c85 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0deda2593262d88515c41d15321a72a8ac7ea47cf46c4c09b50b4ed92ef84c7 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..827edf364971f992c20d30d86b11eec1864dfed0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8f05fc356bdcf4a18d8f360ffe20b68b3c9101db53b890801f9a943b7400eee +size 1165 diff --git a/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..178ce9e1da31bd264e14770bd227a4f2eb6b58ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6cfc9e36987ac469d53dd1d8acabbc1a4bbe88783c0c0a970e65d0fdcc58cad +size 8860 diff --git a/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..dc33a1d320b9213ac2892c616ccc5447e3c4e22f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:727668ffb6e30162af9b2b3fee6dc1789b0b4955323f49aecf209ab7a191ec65 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3f252459149c3e4edb02abdf9e2d48bee6a6b26 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b22573bfe6288e9e378314fbf83c670506f078037df2e12cbb570b484f3e193a +size 8781 diff --git a/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..32e526b1d7e16e6be97075225e0aa807d1f7a531 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a3219f2b95625a9ebb23194a6d1ed1c09dd1362112c74a094b087d1b41f494 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e88736341d40dd98e928ac15adc58ecd7d843f5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e1b0302eae669ffe28a5ab75edb272c795ecf30a070a2352727c6d0e7806f2b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..14415c9da28c6f873a16a8bfc0746313c75d59b6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d2f00bf3c87a34b316e72248721c52de464e9c46620b605b910ff914990045 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..81c3e79e952bc624ca3ee2ff482c1f3b6b029d80 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e9db96787bfdc57c1b8739e9a9ca3cd42d4496a847d5582c816bb6aa81aef68 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6af6e30a9304dc96a8a54b71986bcaaeda78e6c8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23217a6dec8d8e66f924631ea981c7610ca38fb05be90dc5478995ca782915f +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..48f48156724083c4f082655566a1278a80c73b17 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88e1081d0e4464291f49481a237583c68a604d9ba0c82596e361e59f0fb9f902 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd2c8b78a746f4d2939e0a382fe5bf59e21e5376 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa1e7414d88b83a7b3675c837014d72633ff3dfc300f292d2188004fa33d54aa +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..57f67fd7e0407f04a993a54ec7502f1963dcacef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3fc95b69b7bc9adcb51e5582e0373ee5f3ef9f042ff6f80372b5943199765fb +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1be1c36a8ca8f2414460f7485584bcde3caa7ff5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66642fbc92c02a73c70a53a464f1743028d73a6ba8698ba238d45d347d2402db +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d647bb060db428b27aded58bff51b197608a041c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b89d9891f8f6a77be12f3945386dae26460bad1b4ecdf941797499cdd60f483 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0184f313c517cb52c6bebc5146e58c0409a64b6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e160b3685fdc5fd0bf18e1f83aedaaf3e6b2a510d4b529b258031fd105d8100 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e77aafa09fecdc104e63421317ea570eff48a00 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:075c433c3b34f1cd136857c20b60bed2535797acc2b3e84802d2fce80d9ae59b +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..03945d24b3e1ac011f5b6b3047e415cabf417a85 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dccda4e739d3b5f31ad10fc1097f90e3236d4d4963aec7f73a4c35c952fa2c4c +size 8860 diff --git a/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f8f21e1b5037bebe3bc732356ade8180333375b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2e19041a0c199c57c87deb2d2b6b559c59caac0a2421f5e78e58dc97f762cc9 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..3de653c83ac0b2aad7d29df8d4f713bc05c7f5fd --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09027f0955b5b47b1b7353abaf3d2f14c3f28de994fde832f8f6170d08d784a1 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4973cf2df982a3ade47641fdcc6a146c60966e40 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb48c5b0f1afc2a4d3efbc52ef014cbbdda7b8360ee0db482b2d3395b3b02a9c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3309d30cfff4658d313472c61134c52ae6ef5053 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4586e62d425df58a43845af7fa17cf0f71af732b57f999fc870cdb19571ab34 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a8b2453dc24992edb02989be6bc4024c163932e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa04dd9b927e28598bb461aa7dee56c0978f0f3eb5928fe959ced6ce841ea535 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..26afab5c6f93ef97191f944970da3a6dfd63efda --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8327c404ef34fc8aa40f56b52bc8f9f1934b6e5b8ed1fd771f916b0608ed44eb +size 2716 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d52ffdd827325c8ad270170a2f2f72d5f74e4dc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53d906e6219faac8b3650ba6fba5862bcee3de44c6bf3e57a1ee558f9f233c6c +size 2731 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..489aae611be85e039e1c2eea230656ecad985cb9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a13e7b876b702c9cc8a1827e1234aa69ed06a4a8ca2e1469ca7f9019f7a579c8 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f82f8e915eae8fbad1677a417b1902dbaf2d50a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fb40865d0eb082ad99663389c8b4bfbaa786890be184a714fe3508c419deb06 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd2bdf6d59aee309e77e261b328b4d110a7771b4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1338a151b90e923a692854106a0f901f11ecbce35e3e05acb6fe7dff0db1a47b +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f97fab25c5cae09bc6a0817e78136b586ad087d6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19d48e66c1f82675bb116061f16b68cb62667ae475e3a1db898f52e9814f9a92 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f0f89bbbc451e935c8b15a4fdea7a6b4ff233034 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:737fb582a0bf4146f779adc73c7b84ad2c063039cfcd1fd6940d7b3b25257b7e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb4d6c7684f8c3a65b89803f6b6558246b163bae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f4f69d66d998053696437dc13b18100ba041752d733f164442bac7faf048aea +size 1195 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..807e6ff0205d3d1d9064032c8302d054430900e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9784df0f26f77dc5d1d041e351ff9cfae31657dbb8aab6eabefb067536f19e80 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a67647ceaf475606b0058bbc80f85dcf438e94c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08f8e727d2931486d6a0334b09243f0f18bf5a6f530fbf00e4e5379fd2c60a8d +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..99c6091c912e967d1ab566d9e21ae48575433a81 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee07bb748579c81a5f770bbaacbc9b590950a9d6f0ce35f43436f7dfa431a351 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..50974fbefade2ea6cf79218062da983279eaaf2c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade742c5b561128f1b95715bfd37b119e758775a926525eb0d8ee9dd0bb2d248 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b87bea75399698a5153cca01b4ecb0702dec9006 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:929742ef2608a187a20c229e95449dd49fabf55571d8536be0738e8437327110 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa385e257897a84505b0ea6217ea6b84542383d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bbae9f505b49644e77b40ec3cd01fb8f92e9302f85cb5b0aa219f2cab9c182c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7d6ad66eaff7036f2ab20a7bd699940767d8ccb4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd86e997e7b297980556cb6b026a1f7a430d9d11d45df1e7000ec5506f52cb9e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5978a273cf68969656565f4c5ee5ce05e12d406e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29b9ec28dc19c31dfc02c043d0a2e46202aad2d3b3bd2b2c543a80d321603bf1 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..200fb33e415b98035beb5702d80f79efa044905f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29e9edea99e7d6e095dc562a82f362b70b2f42e30385624d45b27273ecd55459 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6889cba1418443a43c3cdeb2bbfefb004f5b3c6b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc0e9aa610703868c3e520d59390e0265ba827db6dd0df35a0eb59ec115315e8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9f4464bc39de8062eaaac2d29f33e546c0646786 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22e956ceeda202e77eb591c3e8f6fbb663cfe37d7571271d1aa168d857525740 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..95ba97935b0a93849e7d2f849b44c4920c63c424 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71a56d5b7eba77c131263b9bd102597255507dd66382438812c169b8f7cc866d +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8560b651845827a8de2eddc0ec4f104c360087d4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62d4d5e6136b174d810059f9d4ab65ae1c1fc440bce66fa171726144e3bac6d4 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1808d6f2ff0452d3a33c55396d3e40192c0d8bc0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ce486368892ec99ae55a386bc2515f127289b5ee8ee6bd6aa4d8de12226c57 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e05856ab64a392c4f85c87a6c4729387275eaad5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b394f80094836ea5ed4a233ee06e2871b737530bf8b9cd7f2efc5064c82f123d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f901e78d1cdfbe701cdd6dccc1f36e6de46c4713 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf37eed13e71f054a41274da2dece41242b77be2bad779f85048664df65a483b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb146a538d0ca732e706fca52faafd4b28867fd9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9116c30f0e7313882ebe2c98efa566838934dc73b41b039ec59c59ee0d317b5e +size 2716 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f0a90cc931dc876576a788e0ab7316720330772 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f252c08b27cef19bf80f3fd67a646070c952ea2b77dcb8638ad349be2ef141c3 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..83daa9764b77b32354d42b2b385c6dcf6f9b1909 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1ef94c76a303e42bace9d2762cee253872f183756eb0d7638499ede27491259 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fa73dd6e365a105a35a04b882dbdb8e0c9ba79b0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6d8f6bba1f1f3420f3eb197442e8e08f81260118742b48a5fcd00e75aff693e +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a19a50eff328f02e4edbcb176144d522912802f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafeac3e6b911a67637962830b9f6f62abf4053e08ca0a6ccd3e8ded3227d16e +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fd15256560806732800cf06e4fdd5f6d840a8757 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b36bb9cb8babca61a844299d79906e4aff0cd800c49d095966a2afdc064c11a +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f23a6b8c18baa9352c575196a911c406a0a61329 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:725b8df07dc535759e181384fb41d4c64f8c6ee71448dd89a550c3862adb2bf5 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..df6b0bf778a1d9f989488bafe69e2d859299cefe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc96af46499e3b015036ba368b198f841994daa041a17a079df33680325db4c5 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b05e1ebfb8315ece694e7c4be8cfda9ece2f49b3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e1c3251cac307275b13210e584b96fed027e4c38fb324668dd36ae00a48b5d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.54.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0948336e4f10c7e23913eee0cfeced364c1f07a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74416b05362cae79e468a9ef83d249703c46733b30e8f60d5039fa190a993112 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.55.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff3f327f05755f0c32cc8367dc87c3064e2ff98b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99b9f45ff3ee8c5583766efbf83fa16cbf8af226cb9d33a5b95a35beb5f6ae13 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.55.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.55.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a44b64dae7e3fd09f4bd98871f129afd004682ef --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c801456f34164128f6611b4b7baae67b596ddb765b8b76aabda87176248e980f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.55.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.55.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9825ffccd6cb70045d183aeaf822595f5ced97a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da6b7b33dc1ea409515cadaff4d785c021c5d9f04ccab3521e03c04c7ef836df +size 1180 diff --git a/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d831f45cd4507a6389df915720b9b4bc378eda12 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7608d883190c9ba91b6aa8161133d63a806e99b8dcb28d2ed42d44edb373ee8b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8a09798d723a0bd058fb87c66c5710900c5b5b7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78936e0779d20777d4de249ac416e4107fe3a8170548cf101057d10fcd9f2ef7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..aee7f249c0e648cb6662644da78c8808bdb931bc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:428594d065212fae2739349d3b526c10db777ca87dd739e3ec2c99ab66385711 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..1413e68f6a9f3445b12b20adf1f5937d25c3041e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faec3988c3ff387a7bcbd22abb460b23ddb12d65a015cbe27c84147551c40dff +size 8875 diff --git a/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..71ea886b59c4406019fdbe2685e20350bd358ebb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6855481d1fd32657b4fc4ad5eb145e544aa890cb24f10b8cffd114e8c9d63bf +size 8781 diff --git a/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e17e7db0cd30af9be34268154b35a3ce2b65396 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f33e25c6f5e334af2cb305f1a251be29f200cfcf3fb64e21cccb6f1636a344ea +size 1180 diff --git a/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8b4e887b9ebba3772f65912998358c11a2eb921b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeaa16734151d355484f399604c37cd97bf097e5bb5b18ece154ec05b715052f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..76b33557a330e610860b6c261ec17f8ae472757c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8b4fdb6f9b63bd44f4a8fa8a39fd579a04792d8c9bd106a11f08872b5fb9eba +size 1165 diff --git a/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..978f5d0756f2cd519df7f52420bd39f738cec913 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de4528485c1af298891c2169596d8bc81165420b73dc47f8de6e4caebe06bb8b +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..673953b2532604b4cdb671aefc5377e379f2b2cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b23e612505f4626c9fca3420c6766f0d53c5893417b04ee51c10ffe9db8ccfe +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8dc3c8f89600b42de1720a4640fd7aaba9c4918e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dd4589939e0a7ceb0f85e828dbe83e4eb5c6af653f804b2964708dc4b750d5d +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..00261b44abee20f792c830a62b7d9f77facc3f34 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0ca209e833bf8eaba00c66305f3ec40bf1f605a9b5673169b833ce34345a08 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..133d5711ed4722b16553c5342da02533b48a5f3e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77572f452b032181cadad56a11f8ffc3b4134ec899ea700d3b44fda1566bff88 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae9c874500396b301df3d65cd892bb2df943f9d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ab733ba5036deac8921cc62c6a3eddb706cc04db6acbfed09e5a9bad2328fbb +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d9b9b52758ab1b7b5c23894a43a56352d384661 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1df9eeb033d5b6dbebf2fd884bb7a302122d20b88c00eeff6bca8a2d4e866433 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f723deb90d752444ddeb810675a579050541f33 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fae33f81a4002b6441cc88b74104890ca82a8f5d8c6ce492836c619f071f93 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6a8933db23c2123f9cd5674e6ddca79a934161a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca66a5ad7c462e819c1f4d4943641eb7e004c4e0e6931e8bbacaceec543ec8d3 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..711de53509ae85cdaa6218e23334b184cf5e1fb5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44da1d7eca3c52494b32a9dbb8add81a81b7ad3dcada49a89ee509bf2fcd6526 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..467b760f82b2ae4548ece60390cbbedec9e949e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44fb55d7cbb61967d627c3254fe547b8813b71cccf8b24c0d6b5845821cecea9 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fea3b6b1c008373433fea2102e6bbed95e23f46 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3843f21f9cdb10e2df654285660c797aa4e52df8665b02bc4f485e0d98661df +size 8781 diff --git a/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..65d178ed817ebff5f462f87a898ab105d8534763 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7d6749e0bb14b65f38fcefaec09e4e04d2e0021b4c777eef07a73db58c58d41 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..35622a94debc0daa26d01bb4032a8f5e30a6da5a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb3562e42c0a3280f1a66d5f663fa0a643f2e2956c84b453053ec15af31ec2f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6af820012bb7399c254aff48379ec112f6fe3e2a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad0472084c7c07c646ccc509e840df312ce3c25615455bc771ae1da2e884aae +size 1165 diff --git a/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3bc13b1b827cc668546a2d3ef4e8518caa3e837 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c43fc4463674be02defa8387f5b499c62337e020e3b18af6edd7a7d79ac0816 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebffd2887f63804f9b0818eb7b3a34d122cdb130 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72438470cd13201abd143d78105e750c9e73fe4f38e733c8490878b6f3d0a476 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..34c7c099b9f95acac86961df9af6177a1e0936f0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c5f20e1725dc33c6445050d6727de1dea3250b2f491624abe694d67fa49e696 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0084aafb3e24c43d2f2d17daa27a5b0d1727a2da --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71cb81cb76a3ab04bc0eb0ddca5efad5a8b0ba3257f39783e6661ad01d564403 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..235948beacf9e673390ef94659cad574db78fa1d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b45243ad6b7ae7c0cef53ba2b172c4b4b921e91831c9057809d37eaef520712 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..67439219b6f6c5c3023f3d7edfd52a00be86c8a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:637afc28398fd7018d5a99804758e4bbc84486d4e9f4bd7fa41b614b398b1e98 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a3374b8290d3573f542433d0b42f73a76b52f5d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7088e8d6f2ec33aa978fd5adc80107bf29b5530e5243eca59210c03c98db658b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..620f58bccf9964287327ed39eb9e684bbc6b2f11 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4c46385f72330dfc75679e65696057b4d0e623c92c0bdee7909aac68dbb3e69 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfb23b1c349d84a201bd09cadd23d1bc755d54f3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:020063125e7cee8950cff72b5b812a44926261c8a0e413b15adcd5fb84ea611c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a07010fff776af5bff824493693d7a545a3504a3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8e853b85e1aa94382bb6470d86211a60f788d7848e16f42be0df3912f916939 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..54ef7fd548d57115cdd395f68c5767c916f1388a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:449c17f2252de9d5c3d0c80aeca3741b7696b022d5e4c8f8c7ee7f03d6e98093 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2200b1c3ea6f772a1d21c771367d46dca6fb6cfc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3153ef8c352e8b4cf4163ac3d140cfa19b9b7c96951ffc5f5457b47efd95077b +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3701744e49867a4d57f5924e59babdbb75f24498 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e83c71fb4b69f2a650a4927495a2bbe1f38629bc738ea39ceb61ce3050a97ff1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4638d89b56861b4b46f89524eba07409dea73fbf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e560d37e9315e180326b1ea38f80d93fbfd8a500208380318011be5dfa0a71 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..876a42d900089041f052d96c7adc6e479d71fdb8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6addfbb6f413df20256a782157c82de415bd47bb8809d6a0b19229fbcafc66 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7840f10e6da0e834e1f2196dd00b2eb2cf1b131e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cac86f90a004914b723c557bd9d375fe8cd46905f615445b49b934654038060 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4bd18db225a7af019749fb6a9e97c6c58c303e20 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83dcd8d427a961ba68978a10cfc211879db8850ba575f8cb7438e6d0d4f3efa8 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0ed977a6ac7684868124afd3c68b85fe86e0acb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f01ad7e9ceb79600d8cd3da668a49858e1417e6987561080cea3ae1a03a9b7 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0258acdac838b21c982623781b1a40a8cf7d9f40 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a1de93b76dd3cf1dce5817d30fd7cba1216539a256cac3b728aef9dc969f760 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2b34f8dec6cfa62c2be5e9ee474769781cac9a9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd4aac0c560fe9504aa09ab4e67d09f8fef2984a0cdf3655050926da9a9ac011 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..50c1a2c52bd4f9f70956d4bf5b7550f2c71ec31d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d2fa61857b363112ef7177c56fd0626f258f739252cf1ae97002dc7beaeec22 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecd7eb34eddeebd99a50d231b98d8f5b273819e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ae9d861af3e2ee13aac7f774f7596c1dfbebfd7d3877e5cafaef3be5ac92a9f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4f9ca72d54a68cfff208bd697c504471b300d2d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db2607d12ec49f7c4909ad76b3a13a2bbd1fdf4a3a813ec08dbab02253c20ed5 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cf3f7f4064628b384ec20a2d2ad65b18c458c666 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db71092ce6ce2ead648fa3dab1136dbbeff95203483873f262b5fcca66b43d3a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2a7996bfcb60ede55222bf4b8ab660d1ba91b0a0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a6d3bb52c2e0e3747c1d7294c76902ed71b3b2b60098c3471683667e0cff89 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..afb87547201a63b8c7f456e0fccee4f3b4b6fafb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:448c51cd8911f0c78d295f86093284af26dd48e89e50a7d032e6ab705f2e2e4f +size 2731 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4828cec3647ec113b0dd08dbb20929f58a82ed8d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c99a0410e9721e1bd774bc97176126f6d4abd1d666cb830557f833292e1015 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b8a7888c451e3c9067ba7ccf6a6ba5cddc4271ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb9e551161adbed59b94631b3eac28400451815b6013b2ab28b708d0a55a1b32 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1c31953e9210f0faae31ff5192e975bcce06f13 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca173e18bb51c7add5c14a13afb9ab944014f23d0a4d67896767cc333543c0cb +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b634d91c5aebba3016f22571c3842321a4a5cb25 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dc6eb6e29642b9998ca0d27ca9a968809cf9014dae014b3779d6d1dc3eefc0d +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a9c78881c4cd293a4c0a8d6742ca376d526916d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb4b5793ca2a34842fb17fb7c384a1120c5a701945b4cb1eb84325f1bd64e90 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..00f859c8b38c5408c1fb4f2f566c69510c335d8e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e28a0bef9a0300f8188b30a1ba23104a9a5befac8c3c379838fc279c38c298 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6370cc3c1f7e777efcdb153829692242294b1736 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f92e20bc66f00254554c059bfcaabe4fcd5b62ae30e4fde1db09c578849be0 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.55.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..98958141422f9c7eb4946eb4616ac5e62aa488db --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:829597ff6dbc12706af211967def030057ca7e593d4844086b39fdfa744217d4 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.6.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2db497a5f45b6c4aac9466c6ad223e5ae248c17b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a2aac8075707351bc604b78dbb7d0f53f75fdad16f89b5f0dffa7f74995525a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.6.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.6.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b29d1a4571b41539857c91ac4ef873b7500bf904 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db9409a6ca211a15039f8cc91e45305b58e66fc52b2bd4b983b3dfb24d39cca2 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.6.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.6.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc0c35e7ab42f35669ba8b91d3ac69ec05600e7c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8feeddd3a50f14310b57e377dd00597e2e1c0c4725fa36ed7b58426ae892c85 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..df2c152b5973131eeb6e679d9cb147c22a05e9cc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c041b7451aae4caed120d1986f9e1cf3046fd6a559fa68a80f7c92e317ca6670 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8019cce89530e9dabaf5e938deadeee4d84b4efb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45bc9450dffed171eb9ca352e593a25609f45824451bce0a259a7d185c39e430 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c0826da0619bb35b29b8123662f341a3edf2083 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e16fbbe42b02b3fc2296eb25dce6f69a33ffd51bd3d860734c5ee7c416728eb +size 8860 diff --git a/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7f95c94737cfcdf3c49a3c09080d509d7dcac748 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bedc22d581cdeecbf467ac59f0bcc572fe42f6c70dfbe9aafba2e2b3d2ff06be +size 8875 diff --git a/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..993415258a07eb6c5056d936ed0dade564513483 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:567fb168bed59dd1e814477bc1118e9cb25422a9ae14abe8ad4a3f4d8d88a57e +size 8781 diff --git a/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f36c6253973f52044c5b39ef5b26cc6b20ce2e94 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:abecd9eea59ade68ffbfe7579744272046c89ed68d127ffce28dc771794400af +size 1180 diff --git a/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d62a5373676d42297d4807d8a0d302a2892991f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ce0d19accdb4379a548e021a1ed4c7ee6dfd41d441184189093ad8eba10694e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1617e1c83d3cc4f9529d8cb5a09ed8bc12ad3a0f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5ed4d0da819035ba6a34d3b3f1e06801d0e059ead49573eb4007ab216ad0c72 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e00ae4139d2e7eb463a0e854eae323492061feb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2344845b952671da2d064d9db0734013056b90a95026da37bd425323450d3d2b +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c3dac2e4d11717a53a00b3188aa4fc0ad4cd7ef1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce79b360b7caf25d6dbb4a6e08cce1ea61cc950e5daaabde7ad173dfced784d6 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b49f48616146badd88dc7093b2e01c16c5e9dfb3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23584091305e1e66ee630b65ce62c2bedbb0da8d011bc805807c3b7d37d922f0 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0422df010114f2ae6428498a5db53debdf33ed3c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d69a3f18d8347f4057f84878c7f29d1de79a3ed94f70cb94e25921b2bbc2577c +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5edb24a7d1ce6367135fa4ddb938a3f267e0ffc8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29a46cf5b8289c27e12369f67d1af122f876559b5fc758de7572311488847e7d +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8a69ce1d817e06e4c792f90704bf1ec038112468 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d977c76775cef0e9a25182e182aa5ac116b236f2dba2f939436e59f3762b8df5 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2beaf68e591a1024e2724174d584ba0f956633ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c895996a5f299c704938cb322010081528f3242e3eb40d8fdc06f8172d761ce3 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8ed818e7c5860754380bff2f6b34d8ce696188c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:802c2e50e7601651b0e656f587b1227e08aed9209b9b7503e5d000ad88207588 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6903dd00676934cca8a2e1f49d94f60aff82e77 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1eebe637fe5acc027f72f0e0dda97124de1010b9cd3b0c5d7146c9334657066a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ab40247c7cc8d0f73709f5540931bc119453fe2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7826c965b7fdc8e31a279fade2e678df6a781df77e01e30c50c048c46858707b +size 8860 diff --git a/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..814cb621b30b24a9c544410592d1a916a188c7e7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f80e582f991af1b361a7b720111773bbe8c1614f189944e601cf6cb768b89f3 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0442c70e504b9733555b3a0846a792242adc4cff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6edc5f6dd167b6d6c5da110ee923670bb6ea610cca1bd1b3946ce102f194de8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9c210d171d580073bbdc92f39a61d1290e28779b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0739a5fc4c22fbfdd071555ac415207d96bb7ce8c53b1b29e5fda1f7c359d1fc +size 1180 diff --git a/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3d945e95613b5c0f962bdda245405f785b3971e1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f820f445d747428694c31f81c8ec5b6a433aff85e75e86d641cfa2ae332bba5f +size 1195 diff --git a/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..eb40ca84b540b3e0199c388a0cfb499742cdd4c4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6438e9e0e498021a37d2e03ca2a80233c9156e7e293b51d63bff22da0bfa16f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8cac25c840300a8728e2e93df3a74826aa112b2f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9da39e6848be5f030f5308bdc58aeb53230b907c7ff1ce46d62e426f4acab80 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c2c6f0783988a8ae66c400b9a67db5d77da0233 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee66e6e54811d1cdaa0a2cf5e663e99399a240d3694a4fc3759a431892aff8d +size 2731 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..81908ea8bd791cc83a31ce7d3741dcf5583854e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ee3cea04d91b664eaf030a6783d4875d8b00afefa99876f10c5f59e66633a19 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e83109585ff00253915b33099be5db910de12b8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7332d6d38d664292dba4929c1ffffa1e107b23faddf721e531f83a617b636188 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..eed2ecb543d5c5eb116b81bd03350aa812f09537 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad2bbb71a273e45bc45ff04759ad21238d5df66358a4fff01c5da5e3d32a1a1d +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..51eb25ec9be0a5bc9dc487eea8d46ca19516f7d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18d22e5a02bbb140143d27e093333923047f18bcdf1c676e6d29d1302c9a10de +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..47eb7b75b2c3720ab687d33f8f5bca7990c710a7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e4e5fa630bd8cc701751162178b6773b7a68da82f073f8691f1c48eed89b4e9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f976fd07780f78a0df0c406430b8be2df4bc96ec --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeb926a2eada235330647b1b4b7409d6a458f3e36704f9a829463ed5ebea6062 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ff2961d36ca497474ea7c084f4010231125202e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50ebee1857eecfb35d1c8638b01c6afcda5c5f3a5741ea898089bba938bd6f8d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c21658483dc5a330f946e1d67739c76c4b609ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aabae6ce462b615084cb82b3d4011d3f1894c5dcb1775a37d63c159a1057db6a +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2617a22837d3b667b93390218213f69346da235d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3656601c377e15197d2985008f77c371e8e5bc8171e97046941cfc6afbde8ae +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb91862bd3e42050828d40539de21f0057e71241 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eefc6cd2a7e600a0e9491ceee71c30a7a77f369364f93ffdfdcbace137071d5d +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1776484414f46529b1bcfd275d50a89248157cf5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf0e3aa10f4eaad92fdb2b5a6e3a183bd45ecc2f2dea2a0e6887dd70e89fe04e +size 1180 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..04a8417c3361e0b2dfa1206a14783f6062f109fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b8667aa94914fb72633ee274fae264451759dbaad89e25910e232ed0b8f158b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f47e2e901a257d8569282ccee6ed10493b423a22 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:294345bba1336da538c585d2f9dc336bbcd310218ca3a7254ab8282850d74c04 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5d15a3832188e73207596ca9dd94bbcd49a5867 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70d4d73cc334d0abcd791d8a7a582a3f3feb5f7c939e0f671b40e94cae2db06e +size 8860 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..a8f4968c1dfae2abd4138d210dcdb224a83d95fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9641527dba7e0bf6cb2cc02feca68b5cdcc1cdc22880f5d4bdbcd824c34a8d5f +size 8875 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8cf5c839d1c9abab7a0dd53325228fd23d45f09 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d73eb0dc2712c46d251dd5b4d1efd9f0db329294a792f1ffff278d51b1c72d8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f9df00325e938cc85b930996a49b64565ae6344 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:552324182409f9a9945e15f5cea020ea549f4913f5eea1121d1ffc9aabea5536 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b2a02b5f66c03aa00a93aa1d4464e4dee51487f4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e9cd899e56155159cc6fb26cdb6bc071b240a1453a91aca47ecb1f3bde9fed +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e473acc9fb9c592806f43fed12f5fb413ac5e0ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ef9268902e63d2ec639dc37d8912e5b6e6af465ecf1f6e4b1f0d817f918bb03 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c866a9f0d83145a56cb6144cf8423b0c3edf152 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9f47a849cb2bd0ed686ae875abaf3894a2117a227e885f098167539b24be35 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..ebb20190006be9bb1b2d0bfea1ce9873cfb2a62f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caf08d8f9a774787fd75ecdbd87ba80af930678d2751945c5f4eaaae45b3bc6c +size 1195 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfffdb31805680e040f673eac2036267ec47bd0f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09ac386f447a15a3aa0bc52d3355d44f054ccce75fabca843a9a812568ab224a +size 1165 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9dadee741ae5130e2fe1046b18d628b939ea3b3c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f178756d9298a036ffede1497977c3d2d97355836becb023440cc3abfba64cd +size 2716 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..62bca3656d830621a5bf46f5a268a5613cb07164 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e43c8c76ad7e201a32badd62c24296def5b558ad628248edd2a7d4ca2274f328 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8bce1803e56569bba498f1e6262a09f79381cf6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1125c67a48c06ba1df2842a6da77ed7865d439ec5404e7369f20549e269cc460 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..01ff92d916ce99b0016687838957270d0b6e1e7c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63ef89272fd53cd4f57735e9dae9ce2e0babf12855d026b692012b4cb0b2748 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5243a11084d830b3b6f4b9a20982abea01f15f0b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df948f2d7d6722ad9eba68667589447de56fcb7e40437146719795354ecc9100 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bdb7cd80f1ff60230ac983970f32aa96722f2c12 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5734ceb3be3859bc19c994bb8ec604d1f9eca7dbc711abbfd6b79fe23efc2b0 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..096a8f7b1e893573b15d78d15ca615a75ac08e8f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69f7326e8187cc14d0347508b2bc1769eddc50d2eb0b5d08c105850333ac94de +size 1180 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..885aedc70fc461a1264c128f797d04ca8f9635d0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec63ec0d919f3b1d8873473f53b73db051cf402671656cd3d71aa59cdf63a731 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..99dff0ae264e34f236322926c5dc8fcbe81ae5f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:571989bbcec29133388324cd6c9d003839dee5f25c13dfd4f8208c806c99bc96 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.6.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1147cba3546c59c4f324a5f0259a948b29cb9785 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7bd848df0d13295a5c70bcb4adf07f655d46ba0793eb7de3dac88fe308fdbed +size 1180 diff --git a/global_step243198_universal/zero/model.layers.7.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b727917912b26d1fcc232b05c10bb0ec4d18e9ac --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:513653eb395c55afec2cb7a49317076a1b1ff2638e8a4ee8dba86b1bf04e2f6b +size 1195 diff --git a/global_step243198_universal/zero/model.layers.7.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.7.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ef0277af6a5af8cd60ea410927130c8266175630 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5615906d813d1e3cc80faffe215af31d916c246f7905110062d69bc65ecf80a8 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.7.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.7.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab755c423c355484a82365816ebdf0aa8517f289 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08e7729eeaeb028194f64cc38b535a2f776ccb5414a0eb22514a2cf0def87933 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..03a9f7da3bce70d73a6cceaec94f202febd23043 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e00a4ed65f2bbf9bd7be2148069fd7c4a7e3a78c03c85c148e1e46636415dc +size 1195 diff --git a/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1da5c8544471fdd3ec65667ef6fbd77734af3e1e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:884286c4b2c7ecb05a51042ea7f60ca7b97ba446ac241138d77b3a7d7e7b24ed +size 1165 diff --git a/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..71bad3602faf725b17e5eaff6cae762c42f8c95c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d520b2e270f8974be580876e689efacecf92527b4b3d174c4aceae15aa669cba +size 8860 diff --git a/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d38c4586bc159688dfa86e36a85c84d775654ca7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:333225bae7f2be9fc3a114b1d24877c470929c2d4ca2fabc5e649343dc451ade +size 8875 diff --git a/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4751abd13298f84fbabe0a8efc3bb4183f96b63e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5936bef4326a6a6b650b96515272601f963124074e8096a00a090d23d4c13d64 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..436a3e142120639098f85f6f83ee10eb3a978a43 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2e51a5e07b36db37cb0ebaa6e025fc23f552f87e963fb516a77fa1af55d4781 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..908513beece508c5ce8535ff87652ded823041fc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47520d8ef9f8d51802e955807233a1099f1b0d68a84b8906e7d7d6d0f4727651 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9a8e927b66b1ac6be1976882c315a0eb433024c9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c83e362fe4d18e0a5bee8d1dfdd4d25e5fdd4c9a44d11dbffc6d149f8dda54d +size 1165 diff --git a/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..98aba62e9c8326d6dd69f77d21d4adc24ce26e47 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3110b909cb35a91f4d3ae71f869ba7ad435a5417f378fa2a26580e021d4f25f +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..153e6d9a12e6dc6a08ae1770461b23b221af61e4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b309ea6e3fb98c98c4e6a4e43bb2129b7a26b5fcef3e64b095251c56f16285d5 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c749db810fab82b46b38d7264519b7fb21b397d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d064e97354f9f62ce7d57015e9128bf852e40c6e0d6a9a3d374ea7107fd09699 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d80557b5c38386d1170070e14fba4f51bf13458b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c33a0f1378190f1603e340e6de09937a397cd04e767233f7a762da02aa0e519 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1c3b175437e1f26e1b97a53c9631aadeead6b3b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14606883a26b202c5801ef35b7ea4d4194c44cbc13083adf16815afda81941d3 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ce2c5779f8a98484511b76cb36c7c754127f6236 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17827326c8cecf5aa6840736ba0b89cffd76180d17812255792618d89b94ae4e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d8118cc3b486fdef8b47ce80260a992a47576e93 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:258ef3c2987d0391c2b3d543cb80bea20c3ee524916af9789fdf1f39bd21de2f +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fc38412fe0c32820559ed84c438657e8852ac71a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5bc72a1b4be95544c358976dedb61591a70be170a1ca0d2bf23f5230c0dbf0 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8886a71120293ed48d847ace73438705a6516f66 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b861c8d71196890a1fa6adfe4748f5d338ccb92af3d6ca1c50e084865ddaf34 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..7c250c3fc410e848c704f3aa913b39e06ccabb9d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7fc20389b6ff79d656d8b915662985d6901b97ed726c16c0050562db564e5a9 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..de59422ea18c4c2e7fa235d3594186f4d6802f73 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65332c59a05e9b2ea6bd5efad82e041281cc738caaaca04ef28787170f26da11 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..aa7d520c2834c789cf64395608b8ae360afc2d9e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df55b583649fc19985a063dbdac424a1f3f279abec55e39fe7c17d10efeff8c8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..85c15ed00179025ffbec1e67131bc068b4ebf040 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e59072fe21ce57468876bcc65b21ce9937d229620a9ac3d6a7573067196b78c +size 1180 diff --git a/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2c5944b1174a5e7b15c97e4845cacfbdbef0591f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3271c3234d64c9f74bc39c680d409f26469c28ae82f42bda47413ece3394863 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..117dc5bf99cc991a2f49c010d3d87a0cfa1a264f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4803572f6c857b3a45349e86f985d881283032a822c4738025975e358fc71e4b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..12c05938518e48c9557cdf208b0b4d9abd073e5b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d3433c7377e9f7bb8ae15d692936af03976941ff84a469f97addbc1df23925a +size 2716 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..385aca66d758bd5cb5451f49bdeeff1ef4a0d8e2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:412827565e23f105c923c1098bd145a815296e40a4ca1def7745c689839bcc97 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..32433543f0413e8a28d169003c09c5a2c88134be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51ec9dcf9a466c8dd27ae51686d131ad9d6c43bb61bb7243a70cf2ce7d937c07 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e0cd45c16b8409fd0999eaadf46ce6fad9fefd2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a177e6906496758ebc490a63936d07e563870830659567cf88b215176cc11eae +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a61fd09e169c99466ac0e926babdeb667a8bb9a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:959344e75e5a943cfb38541d987d971bc365c2aab527b5a11652eabd7efeb648 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d338d4acfb6c13c2415060912f3025888c3b706d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b39b0e08205b6578a378da2c51816e4dc69201bc5032f5e6e6db77497b07fb4e +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..28c2d789f0abfed498a9f3f47e214e93e3339712 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea5491a1a596ea6695c13d151998a49c2ef7ed7865e2e9a97d7f1122b756405 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e66ed4df5c1edd39a838da40320fc7af6bb49f6e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a548cfe14961996a402c1ac52eb3d20d866372fb2a7d3077ce68dedbad7fd6e0 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2198ab45ed2a9e331d3d8f4f0324d515f173d353 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59dc1ff7f0d233142e86a96f46f03b8a512288f5e92d335789e2cddb61106145 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..706061a1b3240c82dd5663c5280ba30fb0192ff3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f17c089e51c144e51163949d4f703191a0d5ff65ad1beedb614314692ce6214e +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..69a668c4d0191277ac566d8efcd53fd333b6e12d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3875ff95481904459961dfb5309a64b134fd5c41a94fc1726b633b5945d4dc85 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9e53a4696c4a8872235d38197e08df72145410ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:709bb179478496da44b89b5352da376360506250b510f31db083d8b627e2ec93 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..74f6d3a55e46f271c21edb07ac6f48608a74b047 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd5d0469e5715024e0b09af0efcad5297681dd1c73f8496c1126da02c16d6c1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c2e40d74cecb753916a9d5ebab0afcac57850cba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:665dd3b35a8a4190dd064f1ad441f8600ed848ae76351a955fbba449ed6c4a1a +size 1195 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2419de4a7ea16e4c9b240f247f65bace0ca2e8fb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9529a78d2f987f773b07883e73fc6813b91a873cdbd019bd67ab114e7303abd1 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..e120c3d8c176a122f2b4ec577aa13794ff7b1b3d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3000832f228dfe8f8c6132238e60cf37e6cc0a1c09b3ea1a78721406751d515 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f5f57ec599a5fbad3e0e3421679d01d42e3a8f70 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a8107009dd56d803b2374159ab75146c8267a3db25b0cb8d1ac855e8721ce69 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6e1d20531132ec463036821a21e7010bc8c7956d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e97ca9618470eab84a85d278b9c888f6ab70d494f0b17c7815d91b56f972f5d8 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..239865e6e5799ff78385580aa2a73083fc022493 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdc59a4c193207f659af95cd8c41230e4669bc4b33096e6dcfc910e748dbf37e +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..92222515cee8a9589cd1710cafe216c1c98dc15b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3cfb523da1a51bd682b6e3ebc85b3b0d352b0355763c164ed2e6ec0bdbe1175 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e5c6b335c69fbcd54e9938328168f871fb955aee --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ad42cdecd6db0a22ecab71bd1451025a1c1877c6b20454022e052c28231dd35 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d5f1a5a4a84d25970544e0c60c00593ff1c0d7d6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:771d02fb2e93803b7536f26dd33b56d2501a49a67ab6adaf0e44a427826d17c6 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..61d1223e02603d7a9a2d1c5a02b6ef2919665ac4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d72bb177658a7dec8c4d769af45e5f44632e2a5e84959af9bac635997f0490dc +size 1195 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d376e78aa084abcd14509f8d9b03ddf7456adb2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9499689d5826c54957e1e169732a6ebc9b485dd7e6d0487e5cd2fe072a5b359f +size 1165 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae88cca9aea04b8acd8018a350ddfad31b226003 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b37979baec647a44a0eced93b99d6ad07ea8835171f322d34f141154aec1192 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bb3cd47a11317922ec6452197ebcc792c7a87e9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34747b53be6f5421ba4632e2e510a3a67f9e4df461900cdb20173f98d991c449 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6cb42f335f0739b2c63fcda0ac7097e9ee5fc808 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9af7d611e5da6fd4574da9dd5bd4c9fd6d94bee5ea5f4dec611e36a77979dd7 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..aac31b693e72dbcfec013b18d4165d065d2a3af9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeae88924ff5b267d6bdcbc283dfee045365b3b405516d7f848b38e21ce71b6a +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..b76519873037b520179d392bc468a6d2409d33b8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3833aa8b66e13c6bba534090a8edb6f48f8d6dba1da91081df315fb64374627 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..858edf470336553752e2f0d9926f60bbee56e9ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22da41b7917773e78b6f1776a40cf62691acb05da5d3d8b449bfe641b9661c00 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8d0d3910435158d0d417d2a9de59547105c03ff4 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9be1903cbc239b9c38fbfd99dc0d4d1d1573531cc0d5894c1fcc4faf98684c7 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e88b37adee48cd6be6d90f3890e1804e25450f57 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:117570306357e01b1fa3e1e0219f5039212a24c87c0e9009bcb575e01ae570d3 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae4f024351d1a93f557b0f2f3308c2d07acfbcff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51e28a38bb979f936b9aa8bc862e71f1b3b2b97c86fd0b9e8cc12566899be4a7 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.7.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..668dc181df340c64a43b9b88409757a3ccf47796 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0692c379db7fdcd4ef479799e70b5498d4a22c832020337b118a3ee0addc25ee +size 1180 diff --git a/global_step243198_universal/zero/model.layers.8.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0568a4dbb2bac34d1979abebcc31d562621518cb --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2beff03e686891d386a067b49d44a0fa9b9511b7fa2378e6b8046e58ce4a0466 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.8.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.8.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..02a587b540c2ace51b9b0f553d81acc0e68a3d35 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:452b6160a720b159044cb937381b8894f1bde7ac749aa3f069970467cad63d13 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.8.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.8.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..076459ecbc6184c1cfbd975d00bcb915990a07a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7accca4859871897da2145f7c9871aa612392ab77b941e9076d941b0fbdc8660 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e6a535d996ee3ad70b51a5a1a0d681b2b1179a04 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e0096e95672262cff00722db8fec3fd367d0bb1e033b76380f47c7c91b8322d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8ee6b9e493d61af1663c16bc41a6dee3c5c421e8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e75ab62545c9d3911ad3bff1a3c1d633171116005c8eca03d3b5a66a45a95175 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3a61dfa654dc450fa54dacbbe5a2c97639ac7bd8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc65e0ae9829c5cd8585c3308a58b042fe83a6f4c18eb86703c1421a4c00a3be +size 8860 diff --git a/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e5395687c5c971b772050f724d87c21f7477f45 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fe662b6c60a24c5a00d2068912259617767642729a83cac3e5a63ac0611a0e2 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f74b13949c2f2bf1e5a2904594d3cf44009e99fe --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6add7d0782ead09d745382dfb1fc75a8dbab4e812d5bbe2712876477923984c +size 8781 diff --git a/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c09a491a35892c138b313e92b49285316eefa44d --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a472c6ba8ca1ca9ca5e3cb4ea8e2caadf8fa7a8b1cd6220cda9f87277f6789a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4024272931d9e3cac3e70ec4ebebf90144c6953 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce3fd083cb7e45e536a73b13518f808a06f2741f9b752dee991b919ca67c714e +size 1195 diff --git a/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c84332df334e3de9846f27ec0d19e69a26e942f2 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13eb22573149659ce1fc0a01e663eae3e45f457d516bdc35e25d77ecf23b8e5c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..d19f2fe5c8fc448a8b646c19dd1b6a9c25158345 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:429afe93f138808545a541484630b7d402c753ed476237f6271f3d4e43da934e +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..6438f1ffcf43323528f3cc11d0a383e1a9dbdd76 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:029257ec58e4cb521a1a13884e6bc74a8c9124389278434dca8c142aec7716d9 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4a6f783f9914f83dc7deb64aad0d5d6c971aa4b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14653e853b15c6860efa5f1252f49da3fcc5bf4a2d3137386a715d77b11d029a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bb2f8dc87bd5ee61b1efcb7912faa0fd55d04af --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ad44e0c0b0364b328c425175dfba54d062f8b08709cae3901b56d5592a699d6 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..668f6dc3a856b03b50570092fdb7a7ff54917795 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2187be6a668c1236e70cdbc8007a3c7c82a112f38751e9194081c1aa36747373 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2179790e7906ac68829751db1d580e4e880625be --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c38af8d304581d78c0223077e05ce1da81fe1de441ce7cee6deada9c4c90722e +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c69d2ff5c46bd23a1fa0f32c6b8fd70843d86afc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39f8be23a4065abc3230c1934f99ac73ba4f6ace73fe151f9da1c1a83d806145 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..47267ac0eb7b18f674e7c26ee1fed722b4f0b385 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24f2b059aeb15149ac7f0f496a0f126be19494365c7577fdfd258b448a74e5d1 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f43bf8eaa09fb6056b8187d0322bdecf1e16f820 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37a7bb96726beefcfe70b941f7cdab70a71ef57ccc2401c325200667e95aa32c +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..80143cc701f339c9847f05abe52c1a141afbdb02 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47782ab7770537e266f84daf3cf347d2c193bbb9ece6069704fc391a0dbfc82f +size 8860 diff --git a/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..27d83bfd1b18c7fafbca05a88083ccdb2a556f46 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75857b4d9caf37acbbfcd94ac490ed0599d3806675ea86e497f2f56b5fd08a1d +size 8875 diff --git a/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..d67cf217c6cc0920e88fdf19e79c1dc3aed1033a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8d578b3805c69607f38b57a920dc274864c29d542d9385592a75745dd02792a +size 8781 diff --git a/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae301b41dd80841ae1a55b2cefa159ba2d9d8810 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3f5ccf2b8934650c9fbfd3dd0a1c6df167eab5af16b1596a4b09fbdaecf1b1d +size 1180 diff --git a/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..702f90673e16f3a0d24ce630f30684742692b8f6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e8639b22584f24944899423ef549a37fc21a196e61b2727d768de65c15cb1b5 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8f3b0ca5ea661f69256f08fdcb06dcba47e51bf7 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7930a2620be93e19c3793c48147cd8ad4aadbff0211f7b4f32a40812564d8994 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..43bde78f90001ada4ff1c0ccb1bb10e7a9fe9b0a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a809feafa25be51874e9b0efbbb24f7df248c45f425374bac6d48e19b04c90b2 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..5e01cf1351286fca641bedf7257ff4f793a68c60 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e42e9962fd5f688227564d144f4072c172a80dc953fe9272809db10ddf7fa68e +size 2731 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c45c384cb81b9e60d1672a8e3315faff99b440f3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b482ac8369487dc7b0e2e8861cf9bd69311311ec06b341c5568027f4bc19d4bb +size 2637 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea6a70c6a963d327889dd1f6a9c4291ffd8be4ca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4db8471ab8b64c2fc9c2658b7b36769f75be677f9af52569d3430b65d7c7f2f +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..f005923f482afb3df06fb318a62186efac728724 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe9d795bdac8d38010eb8f6f6eed43a9a9c41bf9924f77a8c7080582cc0baa4e +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..60304b790d28aa1b27ce47a76aefe905ab341c5e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28d2f944b3b8adc70e1688c8459af91ee15e315d8a104cedc5a4bf100c750e47 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f7fc078855e55d5cb829618f4f1786b895ecc0e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a314a09983be0cba86caa2f6b9cea977a0ac52e1d57364aa42374ecc4e81bc3a +size 1180 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..fe2388ed19ca75878e4477282ecfb80fee67292a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e98e8a7ec8e0e4546732a381d375711bf841dd1410a53ac5ae7027c8bdb7d409 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a74c26ceb042f78a37811a351425b506f2873c84 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:401d49cbbe2070c675441fd95173a5bf60fe6b9965b383d790bfad84da71178b +size 1165 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5306637e48e17fbd1a34097a7ac7075749b1ef98 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e73fb84f62801faea1b1d88a77a5a240fbee83cdb210c009efe85eddf5871f9c +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..77d4ba2b6806fa23b41ce9fb51e0444f4acedfd0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b0f7f473f252ce329dc5fb2784f6c3fba06bcdfd5dd00bd0d175672c43a2db1 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cf7d140477b95a54b8a15b6aaa5e2b17aa0e328 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3528fc754af1778aa04129e11bb9f8c39d047f1f46c1ccd4e9714bf0f59347c +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..24bfd0eb442b669cd7f8b436934f4e4133fb6302 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc1bef1b4d107c15e758423b9be94551ea8fd177a403440d7614802ad56317e1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..846182d6e092b4a61cdf7a7cafb39227c61303af --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305e404485945848f3b39adc64127d69d520246a13bc968400944c1b6f0d6999 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..e118020ff0a8a6dbb42c06906953ffacf7807423 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:226ddc71f5af08887a70746a0ed00d7146b440f5687b3fabb8156ba06caf9145 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a28cdc3f14ea48f8e75cd887924c90e31cb92a75 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c1f11f1b3ace3ca4f132246d1ea91f36b746e68a4b14ccb08c18f14a3f3093 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d874417b7ce5e762752de5bb7a229db6caec17c9 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858df50c0aa85f92625a21d28922f36e48731f3514815cc660b21f4ae6a285a4 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d388acd1a378e8904bed219c547ff53344f118a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93e85533ce49c257833ceaa392090fce1d84c7bec2a10d248da59e22c49594b1 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..31100d5eda499ccd1a23261e1db8fcc6356ed812 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9788a0c903acc399906a90aa60f967d2b94ebf465beecc26688d784ef8d75593 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..c7e481b9fac22e2cb084b4e623ef7f6fb4b7c333 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec8fac3d98c408463610025a46cfab4d305236f838a49bc46b11518a7465cc7e +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..76524d34fab49334e5064e50de1476fec4935982 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eaa53df7e5bf4730cad11e3729ee0de542b05888af1bc851644729c8cd141e7e +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ec1bb2adb2071f4211b828a10b0306a4bb3f5fc --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3ae6b86e65d05c6b9217217bdc754f7e7fd7a9d6ad59ffc2a6a19bb9deea7c9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfdb6ade6a813b416e85dd3e74e4488a563d0a01 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b17550ac6deef046f7364d700b716dfc25d77f6bca10d6c0a0356853ab3b35a5 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5441dccf5354f4472903d4e67a0772ff43b07e4b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd0cb6e60640a1edb24cae6e00bcdec47387e8d97a9f78149049472636721faf +size 1165 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..26ca628e13ba273e2db5d857efb56eaa3af6e772 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea90450cbf217430e95388f8f89c4784e5fcb4476dd0379ced8d8f703b9d53c8 +size 2716 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..7da9ab8d1cca1cafefc6f5776a258e1c2cad1d4b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e9bfc091eb58bea64e6984166d23e2d81bbea99f6d3df7c80b4eda9135d73b +size 2731 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..64c513cb259c0c3c072725c247e9bfa3190a015f --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f132de1fa9e035a463a0500928a2fda657bf701e2f4c255060cb6bd3209565f +size 2637 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a7cd769c9e4ee1d106d58898e464a4d6595d5738 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fd1beb36d32ff8a0db28cc488c318377774ce7c488a127d791011ace9f0f1d9 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..38eab71a4ef85ad338dc3e9c34cf1ff571a0f93e --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b875df0f2189fe5bb810eae52d54eec7c6f3b22c41de7199e91cae6c76aee575 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..16d60a4f0d387def07c47af2307d0182b0a2bb45 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ceaefa2c8c55c988883378cff52a15ad24f2ec88fe00e5794335887d065ddb6 +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dccbdf417a937654c6480f943c53113afbd1d91 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0accc004f57dae3fda499b58e5b16e0d69b515aec6323d43e15c3f7715de04e2 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf332b7bcf0ccf84e75abf8212b52f2dde2a1bda --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e10f6405d9d524e8a9375f9d63d80926c3969f20120cde7b6ecc89df7536430d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc020912dc79d4e5c5ca3e4bd8e33b6188253a8a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e9c00a3d760d303098ee3cfdad1d0565f6a7dba4173c50ea511a653d9bc90ac +size 1165 diff --git a/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.8.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.down_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.down_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..99c0f65c176a81e04c66d15691e4e9141a2303cf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.down_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54a66a98afccfb9aec4227f4f64fda7d89344793ae582a9250fdda060c0a30b9 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.9.down_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.down_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4a5300cb9fcd0213969608196629f95d76a9a9ae --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.down_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23a74181eb1b5eba1fb6c1511b565392ea81a15416c3649e13da961393b0ff56 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.9.down_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.9.down_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..269689d1816bb4bda786fe01db1e82cd76cff357 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.down_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e399a2758f22cb2e3a6a47d8de39568c3f36b65958ec62534bef3b05d6afb7c +size 1165 diff --git a/global_step243198_universal/zero/model.layers.9.down_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.9.down_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.down_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9950310453273d4ece3116d2da67c348cb0eb32 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db934b2b49200cd3b99a5da2c7f31ccbb61efc3b0384a7b2b19fd115c72e785b +size 1180 diff --git a/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..80818b54049cafc16a498141060c565d49af1267 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e55d281a9c69c01c55a3727146b419e64faaf49f9ace0d5a893f603da8ac1139 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..9419777b5316bdefe9ccbab0ac646c2d0a9e17d5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2001026963ca3823e31b4659fefd3ab3ea45a9050af6bb04609c3dedd41bebd9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.gate_up_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b9de949229a2e666df790d755565b872bb18a2ab --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a46814af0a58892229a43e2612d27dd825c2803751f85f714e0d2f4b2a152024 +size 8860 diff --git a/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..84a075e84575dacc10bb064b9aa7bd8da5e2dd16 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7774ae346e2ec00d9d1a9a922f0f2cdb16a0e6c76cdca6e0d2e68bcd33acc16b +size 8875 diff --git a/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f53da9e46435e835e551b84edfa5f56c8b6f58bf --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b41d71cc2c034f4d08b8304e74e2faf2aa503ed579f261de957d654d38d9d3a5 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.input_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..61fde2645edb4aec36072fbe0b1fd68d16995d62 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5d4651bba335e0cffc039f0099232c78a8903a2a6ee8f7920c08d84cb34e88f +size 1180 diff --git a/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ffd8975c3367b1d73e1fe89290d4cc08a7faaba --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc33878c33dbba3b1fa1a4b4b571da65b0c074671a4be2df416b1bf3f05c790d +size 1195 diff --git a/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..18a5cb933d7ed527d2c8f48ae89499a8466b1ff6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29b70deb1f740b455e757708fa47428468c30f1c393e36f2967b3c056863abd +size 1165 diff --git a/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.input_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..a796b11968c8c68b2f4da2345e0af356d89a7a4b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1efbb8a2844caf9e3cca4b0514a3bf6f4e5ee4294227557e50b6bae826da7d9 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..d753cb11cb1a907d4afa049894f24c870e02ecd0 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06597d1506d80e2a5fb71d09b8546c4086867c7fa95b1dd83cab9ccaec0ca8e2 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..150ffae5431d34594444550aa61e3f2ded95f606 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4acf0117061f10b91697588d5decf3b25242d29b548fdad19f1ca57cca458c2a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.down_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2f320d8678806bd4a201f380b6319c0d474206c6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58798370caa4136a12666c897423dc89deceae6c06e837d0c3e092bf04f9411f +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..45de85220c3cd1cb7d121ed58d622d298bdcab63 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f57abee3a730c4076d5de8974afcc8182ec64f9913c09199851880a75b3fc51 +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..1a69289bc5b12d819ff3da3b9af704cf09c5850b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5027af07a78cabda09230c9b85b5326112bc78d987bd646e10467da9ec3b598a +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.gate_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1d3465d2791e8cca003b3b85390384203987f048 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1bbc0e643592e231bc7396b3b8129b2364ad9c4e1cf244d0c4b90855438a6d42 +size 36865244 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e8112357364ff4fb03051c699296401a70bf2631 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d4587e168510945395d7b1bcc74dbd2685dbbf69f40040b1b49fe039f4ce12e +size 36865259 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..dcb17b64045d69202b837e9c92351080d29f8882 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aac49d51cf8770b1691d42cae523a7830f22168f976c747811e5cfc33d61c7a8 +size 36865165 diff --git a/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.mlp.up_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..b78bc12d2e2e7b23bd698702aa3a4d8591fbb818 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07209ccfa6bdab8d23caf013efbe6b53c2a9504a6c55a3475d6d05637f12374a +size 8860 diff --git a/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..993e57181e7b0f548df276d485ae06957180a471 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:befdf5bdbdd41c860047aa25bf496ed02d42c6f86c2147153ee91896dc03e2f6 +size 8875 diff --git a/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/fp32.pt b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c93db6be5492195aeba2f37bd7ee53b25bfe1fca --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d65c23fac33e34f86c2fbf3f74caf911d6cdd524a4c0760bc2c8030eeca4e048 +size 8781 diff --git a/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/step.pt b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..77d7f5d2eef8a95951ca143696f297cf663fe5a6 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490ca625fd27e4280775a5085d2e5395ee938a7f52deef9f65cef3e8d933c2bb +size 1180 diff --git a/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e75952403d2b410712470e93e80f2915119f261 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c33bf64bbe4c71673a699638cf553dbd85c7b53eff1b09f221b56131a7ff3d4 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0dd8523c27677c643faa4b9ed789d22ecbbbe916 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e91f4441089e1630c730c10ebb3a4905e710e9b7b6680af083b947a31f34d9d9 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/step.pt b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.post_attention_layernorm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..2d016b1d289b8a0bc4eaa46869e9213b96063cf3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e5dd63553165e84de503d641862d304fb2fd0aeeeb2480f41c90dc1d29d72d +size 2716 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..498d1538bbd0dab41ee4a625d7a85be654c7f340 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:806168f4c85305ecb55d36279ff3b7283763340a97548e0c41e2568f05bb91a6 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..f69cbe1ceffeb58db675c40d9aa80a92fc15d9ff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:decc77b656f420e8a1d513cea2fdb57329bc86a7bd9b571dbfd9c65fe0dd8c13 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..32fefa0bf90ef8be7c4eb54e5dc8cf9d29d2adde --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80c261fb3825a416cf2d5d149d20474ea8b18bab45efdb00d17bd3e835a6951c +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..0c05cbd0a12705ec23a785e3add8830238bf91ea --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c866f8a75a2077e046bceb3ee8de2a6b9feb1d6bbdfb1aa5a5c3ff62eadb8297 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfb5760520f3d4c8f8ab9d2735e5d0e821fbbeff --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8b3006ea70db64f42a165a6c0cf835f4d98ceebaaef82420136b980de77edee +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..00325bf7f3847c18c5a281f8530b21991dd248f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf13944fe15ebed8e855f7fb32d8c0b09bef58e8c8613f66527bfe34d154e939 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..899f13fa639a310a8fe0b7b020cd251a42251433 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc28e8694ff0aeb67a43de021da11638dd8377d7144a4effb467bd3c8f00b348 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..c768c893d14739e55e29f4e7a9400b0e13d1e75c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c2da2ae889269630e21db97c01f1650ca5bb7789bcf143826ba48f475358aea +size 1165 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.k_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..9989dfc2053bb8f04918a45ca392fbc782c20d36 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ac71475110ea70ba73038d44d0a98f57c77a6e65bcb4ee42d124e965c89007c +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9812363996139b3e320ddd4d5d580ede7a877516 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91cb6704e368a26f07849ce4338f4c30bbbd27f19e77f920679dc4a373b03e09 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..5f31f2eecd0bb1b5c882c7d4ca43562f362474d8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:830a938460a71cc8afece301435ba36de869eec3dbbb69b86e0c47ec4678d4f6 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..581a9585cb2e4663bf60c801e13f4a5060be119a --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:463ff6781d0d9406f166b1f56650d805d2c64c37e2db93edbbb91156ca1fef06 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ebb7c973952dbadb6c0ef66446435145b97d3a3 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76e4cb31b7dab3ff3ba9318b06120302c0f38c4ca49500103aabdd3fca926270 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..0d780a14b92061cbb845232be5fc8ff63ac8d9c5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf613d846b7afc757b979428939964afac7592c393dcd61c7876022c76f8ef35 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.o_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc162866822b4ad2d00ccdb5524e45810839e5f1 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f01c7800a9830bde33caf17abd81baa6252ce0b0a0d00ca2ceeaf42c1b6c091e +size 8860 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4866fbcc911295537068af638c4a175ac0379921 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25031496728a68437090ab3cab4202cb65df764d00fae3689025294ef6ac658a +size 8875 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..4650ae60cc373ff673739fa9f626701e5eab8fda --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a7ebb018be9c0f7ae85fffedab451ecfcd1b1cf8907c589de247a430617991b +size 8781 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..1cb0fea90745959ba72bab65884f2ba1eb55e18c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1f9630114f7516949f0ed690412ee0be988a247982a8a45652030e5f940ccb0 +size 14746844 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..261bb3f09bc1585b2187ea49e0aa77fb672cd843 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab8a4318d214f0c7ed7435221cba24bacf6fa26323d01127263871a6f2bbd959 +size 14746859 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..39b072d2240e7d0779f193597970bc4cd3a3ae80 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c1e02e2a4e8498649e453bfe8becbf89e821a259d55af2e9a78f3710d1c03f4 +size 14746765 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f13455bb9a69971f3a1be3e878fe0fbe621f97ed --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9f431df8b1b69f357f3e5b6bfc91a2dce8ebd596b0dc96c36e76e9dca4543f1 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..439c96408be5ce54fa8cb85b6f3c86fc6eb40945 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fc62bc34e025d15b3e730b619b9dd345d5c40057b07c57a05f3cbf92bcd1e88 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab9e72d92fff4b21e2dd3e55373264e386ffb507 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9c898bc9eca100925a59c94a3e39730760b991e829475654034a2af06a8bb50 +size 1165 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.q_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..f19da4e0bfc0de9424d22d1d6c1bfea229094b73 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22675875b3d241cb9844b0445fa899f51de5908dacc539ded06e00def6159a4a +size 2716 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..57c3ec9aacc1b11706466d43a57fbb4c915f3e50 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69a7c4e0aee923daadf0f2d240991f690ad7fc91b84a83f685fcdc868c0315e6 +size 2731 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..fbf93fb78a6d9c2fc39fa993fea46808ee7bee5b --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90b161e559781f1d4e7f08487ff9e14aaaa102459ec143e143184803849bde76 +size 2637 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.bias/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..60edac25fd337b3c5248985131cbbd6123442175 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:073c35e99cc4edc311806b665e5c842594588b9532c6088f3a55a8918e7a1a07 +size 2950364 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..9aa64b4cae8a22803c5220a94d928cd397324184 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d19226e49e2e088c5757d4b6a07711c01f9676363d90794cdb26320b3b74fae8 +size 2950379 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..a0f02ca24b9b5fd4bdefa702e553de68c216c419 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f63a991e4ec68206554c58e73ed12dad2376834bb1d483f3f28b3ca2d4e1332a +size 2950285 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e0f53583cb16f2035e1cf3abe7c26282fe320a5 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dd9975c96fe41f50f59234712716228574482f16040139987d5aa1de9267890 +size 1180 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..4da5214dc1df6b8d14bfb8650887eafe69e59fd8 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e23e1059a69252ee55ad6516d322a10be3e6f0f43e6acf27401c460d957d7672 +size 1195 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/fp32.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..74f8e3c442145011fedcc2f8b7c774b8b416603c --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0571c261a8fab9d3b1efc208487c77d5391949c15fbaf617725726fb20ccd93e +size 1165 diff --git a/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/step.pt b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.layers.9.self_attn.v_proj_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.norm.weight/exp_avg.pt b/global_step243198_universal/zero/model.norm.weight/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab798031726e82c98f15ed9f96cb137c9a995c5f --- /dev/null +++ b/global_step243198_universal/zero/model.norm.weight/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eefa13c5683c52740a51f502f14f6f962922d44ffb90cbef57fcd6a60d049e9d +size 8860 diff --git a/global_step243198_universal/zero/model.norm.weight/exp_avg_sq.pt b/global_step243198_universal/zero/model.norm.weight/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..e0eca07a75029505aa9a301b10c2010d260e6078 --- /dev/null +++ b/global_step243198_universal/zero/model.norm.weight/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96ede35736ffc7ffb5400b67fccbeb3cbf85c73462936c50fa12546021edaf04 +size 8875 diff --git a/global_step243198_universal/zero/model.norm.weight/fp32.pt b/global_step243198_universal/zero/model.norm.weight/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..8921c86dfb597cc8e4060ddbe5d918c959693e2b --- /dev/null +++ b/global_step243198_universal/zero/model.norm.weight/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26d93588211100fa1a82607581ffaf04c67f8dc0087cdafa5e8a0b47c038f8f7 +size 8781 diff --git a/global_step243198_universal/zero/model.norm.weight/step.pt b/global_step243198_universal/zero/model.norm.weight/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.norm.weight/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/model.norm_alpha/exp_avg.pt b/global_step243198_universal/zero/model.norm_alpha/exp_avg.pt new file mode 100644 index 0000000000000000000000000000000000000000..8c08f205c3d65dd91df4f1b8c9d802f974769dcc --- /dev/null +++ b/global_step243198_universal/zero/model.norm_alpha/exp_avg.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:baa7e1604cb141329bcbd560038819a9c376b132a4ea13f1b68ae013425503a8 +size 1180 diff --git a/global_step243198_universal/zero/model.norm_alpha/exp_avg_sq.pt b/global_step243198_universal/zero/model.norm_alpha/exp_avg_sq.pt new file mode 100644 index 0000000000000000000000000000000000000000..46ca1514b31c970ce7d24bda8a9984614607818a --- /dev/null +++ b/global_step243198_universal/zero/model.norm_alpha/exp_avg_sq.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce3c1493695369938986c2d1fc8db23488b38523b006c413c85138fba1141456 +size 1195 diff --git a/global_step243198_universal/zero/model.norm_alpha/fp32.pt b/global_step243198_universal/zero/model.norm_alpha/fp32.pt new file mode 100644 index 0000000000000000000000000000000000000000..6ac3de0c8c3edde58ab3d1184e517d4a284723f2 --- /dev/null +++ b/global_step243198_universal/zero/model.norm_alpha/fp32.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e57f04b3ded69e6fd1ee4f2aa9e5d270e03baa347ce3438ec058436224bdbc03 +size 1165 diff --git a/global_step243198_universal/zero/model.norm_alpha/step.pt b/global_step243198_universal/zero/model.norm_alpha/step.pt new file mode 100644 index 0000000000000000000000000000000000000000..d011f6b53a8ce719cfe37ff08f5ee678c7f37288 --- /dev/null +++ b/global_step243198_universal/zero/model.norm_alpha/step.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40343668ce95ee7677821672b43b1c1c43ed7c6b638a4da642b389ad816ced6 +size 852 diff --git a/global_step243198_universal/zero/optimizer_state.pt b/global_step243198_universal/zero/optimizer_state.pt new file mode 100644 index 0000000000000000000000000000000000000000..f743b4fabcce3fbfade2c1cae8ed646ac969b710 --- /dev/null +++ b/global_step243198_universal/zero/optimizer_state.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96338ce820582ed28a33569a634202f79f59fbce6b829a32a04849508bf6d562 +size 1664 diff --git a/latest_universal b/latest_universal new file mode 100644 index 0000000000000000000000000000000000000000..4be7e542a295273aa55248d96dac107d0ab5eaeb --- /dev/null +++ b/latest_universal @@ -0,0 +1 @@ +global_step243198_universal \ No newline at end of file diff --git a/modeling_yulanmini.py b/modeling_yulanmini.py new file mode 100644 index 0000000000000000000000000000000000000000..415b8e1c31cccc76c11f07b90f5f25e3d4eef7a3 --- /dev/null +++ b/modeling_yulanmini.py @@ -0,0 +1,1593 @@ +# coding=utf-8 +# Copyright 2022 EleutherAI and the HuggingFace Inc. team. All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" PyTorch YuLanMini model.""" +import json +import math +import re +import warnings +from collections import defaultdict +from datetime import datetime +from typing import Dict, List, Optional, Tuple, Union + +import torch +import torch.nn.functional as F +import torch.utils.checkpoint +from torch import nn +from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, KLDivLoss, MSELoss +from transformers.activations import ACT2FN +from transformers.cache_utils import Cache, DynamicCache, StaticCache +from transformers.modeling_attn_mask_utils import (AttentionMaskConverter, + _prepare_4d_attention_mask) +from transformers.modeling_outputs import (BaseModelOutputWithPast, + CausalLMOutputWithPast, + SequenceClassifierOutputWithPast) +from transformers.modeling_utils import PreTrainedModel +from transformers.pytorch_utils import (ALL_LAYERNORM_LAYERS, + is_torch_greater_or_equal_than_1_13) +from transformers.utils import (add_start_docstrings, + add_start_docstrings_to_model_forward, + is_flash_attn_2_available, + is_flash_attn_greater_or_equal_2_10, logging, + replace_return_docstrings) + +try: + from torch.nn.attention.flex_attention import (create_block_mask, + flex_attention) + + def causal(b, h, q_idx, kv_idx): + return q_idx >= kv_idx + + block_mask = create_block_mask(causal, B=None, H=None, Q_LEN=4096, KV_LEN=4096) +except ImportError: + pass +import os +import sys + +sys.path.append('/home/u20140041/pretrain-mini/model') +from configuration_yulanmini import YuLanMiniConfig + +# from unsloth.models.llama import CausalLM_fast_forward, LlamaModel_fast_forward_inference, LlamaAttention_fast_forward, LlamaModel_fast_forward, LlamaDecoderLayer_fast_forward + +if is_flash_attn_2_available(): + from modeling_flash_attention_utils import _flash_attention_forward + +# from liger_kernel.transformers.experimental.embedding import LigerEmbedding +import wandb +from liger_kernel.transformers.cross_entropy import LigerCrossEntropyLoss +from liger_kernel.transformers.fused_linear_cross_entropy import \ + LigerFusedLinearCrossEntropyLoss +from liger_kernel.transformers.layer_norm import LigerLayerNorm +from liger_kernel.transformers.rms_norm import LigerRMSNorm +from liger_kernel.transformers.rope import liger_rotary_pos_emb +from liger_kernel.transformers.swiglu import LigerSwiGLUMLP + +LOCAL_RANK = int(os.getenv("LOCAL_RANK", "0")) +RANK = int(os.getenv("RANK", "0")) +WORLD_SIZE = int(os.getenv("WORLD_SIZE", "1")) + + +def print_rank0(*arg): + if LOCAL_RANK == 0: + print(*arg) + +logger = logging.get_logger(__name__) + +_CONFIG_FOR_DOC = "YuLanMiniConfig" + + +# https://github.com/unslothai/unsloth/blob/4e570be9ae4ced8cdc64e498125708e34942befc/unsloth/models/llama.py#L276 +def rms_layernorm(hidden: torch.Tensor, weight: torch.Tensor, eps: float): + old_dtype = hidden.dtype + hidden_fp32 = hidden.to(torch.float32) + variance = hidden_fp32.square().mean(dim=-1, keepdim=True) + hidden = (hidden_fp32 * (variance + eps).rsqrt()).to(old_dtype) + hidden *= weight + return hidden + + +def _prepare_4d_causal_attention_mask_with_cache_position( + attention_mask: torch.Tensor, + sequence_length: int, + target_length: int, + dtype: torch.dtype, + device: torch.device, + min_dtype: float, + cache_position: torch.Tensor, + batch_size: int, +): + """ + Creates a causal 4D mask of shape `(batch_size, 1, query_length, key_value_length)` from a 2D mask of shape + `(batch_size, key_value_length)`, or if the input `attention_mask` is already 4D, do nothing. + + Args: + attention_mask (`torch.Tensor`): + A 2D attention mask of shape `(batch_size, key_value_length)` or a 4D attention mask of shape `(batch_size, 1, query_length, key_value_length)`. + sequence_length (`int`): + The sequence length being processed. + target_length (`int`): + The target length: when generating with static cache, the mask should be as long as the static cache, to account for the 0 padding, the part of the cache that is not filled yet. + dtype (`torch.dtype`): + The dtype to use for the 4D attention mask. + device (`torch.device`): + The device to plcae the 4D attention mask on. + min_dtype (`float`): + The minimum value representable with the dtype `dtype`. + cache_position (`torch.Tensor`): + Indices depicting the position of the input sequence tokens in the sequence. + batch_size (`torch.Tensor`): + Batch size. + """ + if attention_mask is not None and attention_mask.dim() == 4: + # In this case we assume that the mask comes already in inverted form and requires no inversion or slicing. + causal_mask = attention_mask + else: + causal_mask = torch.full((sequence_length, target_length), + fill_value=min_dtype, + dtype=dtype, + device=device) + if sequence_length != 1: + causal_mask = torch.triu(causal_mask, diagonal=1) + causal_mask *= torch.arange(target_length, + device=device) > cache_position.reshape( + -1, 1) + causal_mask = causal_mask[None, + None, :, :].expand(batch_size, 1, -1, -1) + if attention_mask is not None: + causal_mask = causal_mask.clone( + ) # copy to contiguous memory for in-place edit + mask_length = attention_mask.shape[-1] + padding_mask = causal_mask[:, :, :, : + mask_length] + attention_mask[:, None, + None, :] + padding_mask = padding_mask == 0 + causal_mask[:, :, :, : + mask_length] = causal_mask[:, :, :, : + mask_length].masked_fill( + padding_mask, min_dtype) + + return causal_mask + + +class YuLanMiniRMSNorm(nn.Module): + + def __init__(self, hidden_size, eps=1e-6, casting_mode="llama", offset=0, init_fn="ones"): + """ + YuLanMiniRMSNorm is equivalent to T5LayerNorm + """ + super().__init__() + if init_fn == "ones": + self.weight = nn.Parameter(torch.ones(hidden_size)) + elif init_fn == "zeros": + self.weight = nn.Parameter(torch.zeros(hidden_size)) + else: + raise ValueError(f"Invalid init_fn: {init_fn}") + self.variance_epsilon = eps + self.offset = offset + self.casting_mode = casting_mode + + def forward(self, hidden_states): + old_dtype = hidden_states.dtype + hidden_fp32 = hidden_states.to(torch.float32) + variance = hidden_fp32.square().mean(dim=-1, keepdim=True) + if self.casting_mode == "gemma": + hidden = (hidden_fp32 * (variance + self.variance_epsilon).rsqrt()).to(old_dtype) + hidden *= (self.weight + self.offset) + elif self.casting_mode == "llama": + hidden = (hidden_fp32 * (variance + self.variance_epsilon).rsqrt()) + hidden *= (self.weight.float() + self.offset) + hidden = hidden.to(old_dtype) + else: + raise ValueError(f"Invalid casting_mode: {self.casting_mode}") + return hidden + + def extra_repr(self): + return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}" + + +ALL_LAYERNORM_LAYERS.append(YuLanMiniRMSNorm) +ALL_LAYERNORM_LAYERS.append(LigerRMSNorm) + + +class YuLanMiniRotaryEmbedding(nn.Module): + + def __init__(self, + dim, + max_position_embeddings=4096, + base=10000, + device=None): + super().__init__() + + self.dim = dim + self.max_position_embeddings = max_position_embeddings + self.base = base + + # Build here to make `torch.jit.trace` work. + self._set_cos_sin_cache(seq_len=max_position_embeddings, + device="cuda" if device is None else device, + dtype=torch.get_default_dtype()) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + + self.max_seq_len_cached = seq_len + inv_freq = 1.0 / (self.base**(torch.arange( + 0, self.dim, 2, dtype=torch.int64, device="cpu").float() / + self.dim)) + t = torch.arange(self.max_seq_len_cached, + device="cpu", + dtype=torch.int64).float() + + freqs = torch.outer(t, inv_freq) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer("cos_cached", + emb.cos().to(dtype=dtype, + device=device, + non_blocking=True), + persistent=False) + self.register_buffer("sin_cached", + emb.sin().to(dtype=dtype, + device=device, + non_blocking=True), + persistent=False) + + def forward(self, x, seq_len=None): + # x: [bs, num_attention_heads, seq_len, head_size] + if seq_len > self.max_seq_len_cached: + self._set_cos_sin_cache(seq_len=seq_len, + device=x.device, + dtype=x.dtype) + + return ( + self.cos_cached[:seq_len].to(dtype=x.dtype), + self.sin_cached[:seq_len].to(dtype=x.dtype), + ) + + def get_cached(self, seq_len=None): + return self.cos_cached, self.sin_cached + + +class YuLanMiniLinearScalingRotaryEmbedding(YuLanMiniRotaryEmbedding): + """YuLanMiniRotaryEmbedding extended with linear scaling. Credits to the Reddit user /u/kaiokendev""" + + def __init__(self, + dim, + max_position_embeddings=2048, + base=10000, + device=None, + scaling_factor=1.0): + self.scaling_factor = scaling_factor + super().__init__(dim, max_position_embeddings, base, device) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + t = torch.arange(self.max_seq_len_cached, + device=device, + dtype=torch.int64).type_as(self.inv_freq) + t = t / self.scaling_factor + + freqs = torch.outer(t, self.inv_freq) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer("cos_cached", + emb.cos().to(dtype), + persistent=False) + self.register_buffer("sin_cached", + emb.sin().to(dtype), + persistent=False) + + +class YuLanMiniDynamicNTKScalingRotaryEmbedding(YuLanMiniRotaryEmbedding): + """YuLanMiniRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozilla""" + + def __init__(self, + dim, + max_position_embeddings=2048, + base=10000, + device=None, + scaling_factor=1.0): + self.scaling_factor = scaling_factor + super().__init__(dim, max_position_embeddings, base, device) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + + if seq_len > self.max_position_embeddings: + base = self.base * ((self.scaling_factor * seq_len / + self.max_position_embeddings) - + (self.scaling_factor - 1))**(self.dim / + (self.dim - 2)) + inv_freq = 1.0 / (base**( + torch.arange(0, self.dim, 2).float().to(device) / self.dim)) + self.register_buffer("inv_freq", inv_freq, persistent=False) + + t = torch.arange(self.max_seq_len_cached, + device=device, + dtype=torch.int64).type_as(self.inv_freq) + + freqs = torch.outer(t, self.inv_freq) + # Different from paper, but it uses a different permutation in order to obtain the same calculation + emb = torch.cat((freqs, freqs), dim=-1) + + self.register_buffer("cos_cached", + emb.cos().to(dtype), + persistent=False) + self.register_buffer("sin_cached", + emb.sin().to(dtype), + persistent=False) + + +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., :x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2:] + return torch.cat((-x2, x1), dim=-1) + + +def apply_rotary_pos_emb(q, + k, + cos, + sin, + position_ids, + unsqueeze_dim=1, + fast=False): + """Applies Rotary Position Embedding to the query and key tensors. + + Args: + q (`torch.Tensor`): The query tensor. + k (`torch.Tensor`): The key tensor. + cos (`torch.Tensor`): The cosine part of the rotary embedding. + sin (`torch.Tensor`): The sine part of the rotary embedding. + position_ids (`torch.Tensor`): + The position indices of the tokens corresponding to the query and key tensors. For example, this can be + used to pass offsetted position ids when working with a KV-cache. + unsqueeze_dim (`int`, *optional*, defaults to 1): + The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and + sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note + that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and + k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes + cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have + the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2. + Returns: + `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding. + """ + if fast: + return liger_rotary_pos_emb(q, k, cos, sin, position_ids, + unsqueeze_dim) + + # cos = cos[position_ids].unsqueeze(unsqueeze_dim) + # sin = sin[position_ids].unsqueeze(unsqueeze_dim) + # q_embed = (q * cos) + (rotate_half(q) * sin) + # k_embed = (k * cos) + (rotate_half(k) * sin) + # return q_embed, k_embed + # weired, its faster to run in float32 + orig_dtype = k.dtype + cos = cos[position_ids].unsqueeze(unsqueeze_dim) # [bs, 1, seq_len, dim] + sin = sin[position_ids].unsqueeze(unsqueeze_dim) # [bs, 1, seq_len, dim] + q_fp32 = q.to(dtype=torch.float32, device=q.device) + k_fp32 = k.to(dtype=torch.float32, device=k.device) + q_embed = (q_fp32 * cos) + (rotate_half(q_fp32) * sin) + k_embed = (k_fp32 * cos) + (rotate_half(k_fp32) * sin) + return q_embed.to(dtype=orig_dtype), k_embed.to(dtype=orig_dtype) + + +class YuLanMiniMLP(nn.Module): + def __init__(self, config): + super().__init__() + self.hidden_size = config.hidden_size + self.intermediate_size = config.intermediate_size + self.gate_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False) + self.up_proj = nn.Linear(self.hidden_size, self.intermediate_size, bias=False) + self.down_proj = nn.Linear(self.intermediate_size, self.hidden_size, bias=False) + self.down_proj.__do_scale_tager__ = True + + self.gate_proj.__do_scale_tager_mu_dim_model__ = True + self.up_proj.__do_scale_tager_mu_dim_model__ = True + self.down_proj.__do_scale_tager_mu_ffn__ = True + + self.act_fn = ACT2FN[config.hidden_act] + + def forward(self, hidden_state): + return self.down_proj(self.act_fn(self.gate_proj(hidden_state)) * self.up_proj(hidden_state)) + + + +def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor: + """ + This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch, + num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim) + """ + batch, num_key_value_heads, slen, head_dim = hidden_states.shape + if n_rep == 1: + return hidden_states + hidden_states = hidden_states[:, :, + None, :, :].expand(batch, + num_key_value_heads, + n_rep, slen, head_dim) + return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, + head_dim) + + +def get_hidden_states_logger(layer_idx, num_hidden_layers=None): + if num_hidden_layers is None: + log_interval = None + else: + log_interval = (num_hidden_layers - 1) // 5 + + @torch.no_grad() + def log_hidden_states_decoder_layers(name, hidden_states): + return + if layer_idx % log_interval == 0 and wandb.run is not None and wandb.config.get("global_step", 0) % 23 == 0: + layer = layer_idx // log_interval + 1 + # wandb.log({f"hidden_states_var/{layer}_{name}": torch.var(hidden_states, dim=-1).mean().item()}, commit=False) + # wandb.log({f"hidden_states_mean/{layer}_{name}": torch.mean(hidden_states, dim=-1).mean().item()}, commit=False) + # wandb.log({f"hidden_states_rms/{layer}_{name}": torch.sqrt(torch.mean(hidden_states**2, dim=-1)).mean().item()}, commit=False) + + @torch.no_grad() + def log_hidden_states_transformers(layer_idx, name, hidden_states): + return + if wandb.run is not None and wandb.config.get("global_step", 0) % 23 == 0: + pass + # wandb.log({f"hidden_states_var/{layer_idx}_{name}": torch.var(hidden_states, dim=-1).mean().item()}, commit=False) + # wandb.log({f"hidden_states_mean/{layer_idx}_{name}": torch.mean(hidden_states, dim=-1).mean().item()}, commit=False) + # wandb.log({f"hidden_states_rms/{layer_idx}_{name}": torch.sqrt(torch.mean(hidden_states**2, dim=-1)).mean().item()}, commit=False) + + if num_hidden_layers is None: + return log_hidden_states_transformers + else: + return log_hidden_states_decoder_layers + +def get_od_weight_logger(layer_idx, num_hidden_layers=None): + if num_hidden_layers is None: + log_interval = None + else: + log_interval = (num_hidden_layers - 1) // 5 + + @torch.no_grad() + def log_od_weight(name, weight_matrix): + return + if layer_idx % log_interval == 0 and wandb.run is not None and wandb.config.get("global_step", 0) % 23 == 0: + layer = layer_idx // log_interval + 1 + # wandb.log({f"weight_var/{layer}_{name}": torch.var(weight_matrix).item()}, commit=False) + # wandb.log({f"weight_mean/{layer}_{name}": torch.mean(weight_matrix).item()}, commit=False) + # wandb.log({f"weight_rms/{layer}_{name}": torch.sqrt(torch.mean(weight_matrix**2)).item()}, commit=False) + + return log_od_weight + + +class StableLmLayerNormPerHead(nn.Module): + def __init__(self, dim, num_heads, eps=1e-5, bias=False, use_liger=False): + super().__init__() + self.dim = dim + self.num_heads = num_heads + if use_liger: + self.norms = nn.ModuleList([LigerLayerNorm(dim, eps=eps, bias=bias) for _ in range(self.num_heads)]) + else: + self.norms = nn.ModuleList([nn.LayerNorm(dim, eps=eps, bias=bias) for _ in range(self.num_heads)]) + + def forward(self, hidden_states: torch.Tensor): + # Split along the num_heads axis to get per-head inputs + # [batch_size, num_heads, seq_len, head_dim] -> [batch_size, 1, seq_len, head_dim] * num_heads + states_per_heads = torch.split(hidden_states, 1, dim=1) + # Normalize and merge the heads back together + return torch.cat([norm(hidden_states) for norm, hidden_states in zip(self.norms, states_per_heads)], dim=1) + + +class YuLanMiniAttention(nn.Module): + """ + Multi-headed attention from 'Attention Is All You Need' paper. Modified to use sliding window attention: Longformer + and "Generating Long Sequences with Sparse Transformers". + """ + + def __init__(self, + config: YuLanMiniConfig, + layer_idx: Optional[int] = None): + super().__init__() + self.config = config + self.layer_idx = layer_idx + if layer_idx is None: + logger.warning_once( + f"Instantiating {self.__class__.__name__} without passing `layer_idx` is not recommended and will " + "to errors during the forward call, if caching is used. Please make sure to provide a `layer_idx` " + "when creating this class.") + + self.hidden_size = config.hidden_size + self.num_heads = config.num_attention_heads + self.head_dim = self.hidden_size // self.num_heads + self.num_key_value_heads = config.num_key_value_heads + self.num_key_value_groups = self.num_heads // self.num_key_value_heads + self.max_position_embeddings = config.max_position_embeddings + self.rope_theta = config.rope_theta + self.is_causal = True + self.attention_dropout = config.attention_dropout + + if (self.head_dim * self.num_heads) != self.hidden_size: + raise ValueError( + f"hidden_size must be divisible by num_heads (got `hidden_size`: {self.hidden_size}" + f" and `num_heads`: {self.num_heads}).") + + self.q_proj = nn.Linear(self.hidden_size, self.num_heads * self.head_dim, bias=config.attention_bias) + self.k_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias) + self.v_proj = nn.Linear(self.hidden_size, self.num_key_value_heads * self.head_dim, bias=config.attention_bias) + self.o_proj = nn.Linear(self.num_heads * self.head_dim, + self.hidden_size, + bias=False) + self.o_proj.__do_scale_tager__ = True + self.q_proj.__do_scale_tager_mu_dim_model__=True + self.k_proj.__do_scale_tager_mu_dim_model__=True + self.v_proj.__do_scale_tager_mu_dim_model__=True + self.o_proj.__do_scale_tager_mu_o__=True + if self.config.wesar_weights: + self.q_proj_alpha = nn.Parameter(torch.ones(1) * self.config.q_proj_alpha) + self.k_proj_alpha = nn.Parameter(torch.ones(1) * self.config.k_proj_alpha) + self.v_proj_alpha = nn.Parameter(torch.ones(1) * self.config.v_proj_alpha) + self.o_proj_alpha = nn.Parameter(torch.ones(1) * self.config.o_proj_alpha) + else: + self.q_proj_alpha=1 + self.k_proj_alpha=1 + self.v_proj_alpha=1 + self.o_proj_alpha=1 + + + self.qk_layernorm = config.qk_layernorm + if self.qk_layernorm: + self.q_layernorm = StableLmLayerNormPerHead( + self.head_dim, self.num_heads, eps=config.layer_norm_eps, use_liger=config.use_liger, + ) + self.k_layernorm = StableLmLayerNormPerHead( + self.head_dim, self.num_key_value_heads, eps=config.layer_norm_eps, use_liger=config.use_liger, + ) + + self.log_hidden_states = get_hidden_states_logger(self.layer_idx, self.config.num_hidden_layers) + + def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int): + return tensor.view(bsz, seq_len, self.num_heads, + self.head_dim).transpose(1, 2).contiguous() + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Cache] = None, + output_attentions: bool = False, + use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, + **kwargs, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], + Optional[Tuple[torch.Tensor]]]: + logger.warning_once("You are not running the flash-attention implementation, expect numerical differences.") + + bsz, q_len, _ = hidden_states.size() + + query_states = self.q_proj(hidden_states) + query_states = query_states * self.q_proj_alpha + key_states = self.k_proj(hidden_states) + key_states = key_states * self.k_proj_alpha + value_states = self.v_proj(hidden_states) + value_states = value_states * self.v_proj_alpha + + query_states = query_states.view(bsz, q_len, self.num_heads, + self.head_dim).transpose(1, 2) + key_states = key_states.view(bsz, q_len, self.num_key_value_heads, + self.head_dim).transpose(1, 2) + value_states = value_states.view(bsz, q_len, self.num_key_value_heads, + self.head_dim).transpose(1, 2) + + if self.qk_layernorm: + query_states = self.q_layernorm(query_states) + key_states = self.k_layernorm(key_states) + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + if self.layer_idx is None: + raise ValueError( + f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} " + "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class " + "with a layer index.") + kv_seq_len += past_key_value.get_usable_length( + kv_seq_len, self.layer_idx) + cos, sin = self.rotary_emb(value_states, seq_len=kv_seq_len) + query_states, key_states = apply_rotary_pos_emb( + query_states, key_states, cos, sin, position_ids) + + if past_key_value is not None: + cache_kwargs = { + "sin": sin, + "cos": cos, + "cache_position": cache_position + } # Specific to RoPE models + key_states, value_states = past_key_value.update( + key_states, value_states, self.layer_idx, cache_kwargs) + + # repeat k/v heads if n_kv_heads < n_heads + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + + attn_weights = torch.matmul(query_states, key_states.transpose( + 2, 3)) * math.sqrt(self.config.dim_model_base_attn) / self.head_dim + + if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len): + raise ValueError( + f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is" + f" {attn_weights.size()}") + + if attention_mask is not None: # no matter the length, we just slice it + causal_mask = attention_mask[:, :, :, :key_states.shape[-2]] + attn_weights = attn_weights + causal_mask + + # upcast attention to fp32 + attn_weights = nn.functional.softmax(attn_weights, + dim=-1, + dtype=torch.float32).to( + query_states.dtype) + self.log_hidden_states("1_attn_weights", attn_weights) + attn_weights = nn.functional.dropout(attn_weights, + p=self.attention_dropout, + training=self.training) + attn_output = torch.matmul(attn_weights, value_states) + + if attn_output.size() != (bsz, self.num_heads, q_len, self.head_dim): + raise ValueError( + f"`attn_output` should be of size {(bsz, self.num_heads, q_len, self.head_dim)}, but is" + f" {attn_output.size()}") + + attn_output = attn_output.transpose(1, 2).contiguous() + attn_output = attn_output.reshape(bsz, q_len, self.hidden_size) + + attn_output = self.o_proj(attn_output) + attn_output = self.o_proj_alpha * attn_output + + if not output_attentions: + attn_weights = None + + return attn_output, attn_weights, past_key_values + +class YuLanMiniFlashAttention2(YuLanMiniAttention): + """ + YuLanMini flash attention module. This module inherits from `YuLanMiniAttention` as the weights of the module stays + untouched. The only required change would be on the forward pass where it needs to correctly call the public API of + flash attention and deal with padding tokens in case the input contains any of them. Additionally, for sliding window attention, we apply SWA only to the bottom + config.max_window_layers layers. + """ + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1. + # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0. + # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left). + self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10( + ) + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.LongTensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Cache] = None, + output_attentions: bool = False, + use_cache: bool = False, + cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + **kwargs, + ) -> Tuple[torch.Tensor, Optional[torch.Tensor], + Optional[Tuple[torch.Tensor]]]: + output_attentions = False + + bsz, q_len, _ = hidden_states.size() + + query_states = self.q_proj(hidden_states) + query_states = query_states * self.q_proj_alpha + key_states = self.k_proj(hidden_states) + key_states = key_states * self.k_proj_alpha + value_states = self.v_proj(hidden_states) + value_states = value_states * self.v_proj_alpha + + # Flash attention requires the input to have the shape + # batch_size x seq_length x head_dim x hidden_dim + # therefore we just need to keep the original shape + query_states = query_states.view(bsz, q_len, self.num_heads, + self.head_dim).transpose(1, 2) + key_states = key_states.view(bsz, q_len, self.num_key_value_heads, + self.head_dim).transpose(1, 2) + value_states = value_states.view(bsz, q_len, self.num_key_value_heads, + self.head_dim).transpose(1, 2) + + if self.qk_layernorm: + query_states = self.q_layernorm(query_states) + key_states = self.k_layernorm(key_states) + + kv_seq_len = key_states.shape[-2] + if past_key_value is not None: + if self.layer_idx is None: + raise ValueError( + f"The cache structure has changed since version v4.36. If you are using {self.__class__.__name__} " + "for auto-regressive decoding with k/v caching, please make sure to initialize the attention class " + "with a layer index.") + kv_seq_len += past_key_value.get_usable_length( + kv_seq_len, self.layer_idx) + + cos, sin = position_embeddings + query_states, key_states = apply_rotary_pos_emb(query_states, + key_states, + cos, + sin, + position_ids=position_ids, + fast=True) + + if past_key_value is not None: + # Activate slicing cache only if the config has a value `sliding_windows` attribute + cache_has_contents = past_key_value.get_seq_length( + self.layer_idx) > 0 + if (getattr(self.config, "sliding_window", None) is not None + and kv_seq_len > self.config.sliding_window + and cache_has_contents): + slicing_tokens = 1 - self.config.sliding_window + + past_key = past_key_value[self.layer_idx][0] + past_value = past_key_value[self.layer_idx][1] + + past_key = past_key[:, :, slicing_tokens:, :].contiguous() + past_value = past_value[:, :, slicing_tokens:, :].contiguous() + + if past_key.shape[-2] != self.config.sliding_window - 1: + raise ValueError( + f"past key must have a shape of (`batch_size, num_heads, self.config.sliding_window-1, head_dim`), got" + f" {past_key.shape}") + + if attention_mask is not None: + attention_mask = attention_mask[:, slicing_tokens:] + attention_mask = torch.cat([ + attention_mask, + torch.ones_like(attention_mask[:, -1:]) + ], + dim=-1) + + cache_kwargs = { + "sin": sin, + "cos": cos, + "cache_position": cache_position + } # Specific to RoPE models + key_states, value_states = past_key_value.update( + key_states, value_states, self.layer_idx, cache_kwargs) + + # todo: check if we need to repeat_kv + # repeat k/v heads if n_kv_heads < n_heads + key_states = repeat_kv(key_states, self.num_key_value_groups) + value_states = repeat_kv(value_states, self.num_key_value_groups) + dropout_rate = 0.0 if not self.training else self.attention_dropout + + # In PEFT, usually we cast the layer norms in float32 for training stability reasons + # therefore the input hidden states gets silently casted in float32. Hence, we need + # cast them back in the correct dtype just to be sure everything works as expected. + # This might slowdown training & inference so it is recommended to not cast the LayerNorms + # in fp32. (YuLanMiniRMSNorm handles it correctly) + + input_dtype = query_states.dtype + if input_dtype == torch.float32: + if torch.is_autocast_enabled(): + target_dtype = torch.get_autocast_gpu_dtype() + # Handle the case where the model is quantized + elif hasattr(self.config, "_pre_quantization_dtype"): + target_dtype = self.config._pre_quantization_dtype + else: + target_dtype = self.q_proj.weight.dtype + + logger.warning_once( + f"The input hidden states seems to be silently casted in float32, this might be related to" + f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in" + f" {target_dtype}.") + + query_states = query_states.to(target_dtype) + key_states = key_states.to(target_dtype) + value_states = value_states.to(target_dtype) + + # TODO: These transpose are quite inefficient but Flash Attention requires the layout [batch_size, sequence_length, num_heads, head_dim]. We would need to refactor the KV cache + # to be able to avoid many of these transpose/reshape/view. + query_states = query_states.transpose(1, 2) + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + + if (self.config.use_sliding_window + and getattr(self.config, "sliding_window", None) is not None + and self.layer_idx >= self.config.max_window_layers): + sliding_window = self.config.sliding_window + else: + sliding_window = None + + attn_output, softmax_lse, _ = _flash_attention_forward( + query_states, + key_states, + value_states, + attention_mask, + q_len, + position_ids=position_ids, + dropout=dropout_rate, + sliding_window=sliding_window, + is_causal=self.is_causal, + softmax_scale = math.sqrt(self.config.dim_model_base_attn) / self.head_dim, + use_top_left_mask=self._flash_attn_uses_top_left_mask, + return_attn_probs=True, + ) + self.log_hidden_states("1_attn_weights", softmax_lse) + + attn_output = attn_output.reshape(bsz, q_len, + self.hidden_size).contiguous() + + attn_output = self.o_proj(attn_output) + attn_output = self.o_proj_alpha * attn_output + + return attn_output, None, past_key_value + + +YULANMINI_ATTENTION_CLASSES = { + "eager": YuLanMiniAttention, + "flash_attention_2": YuLanMiniFlashAttention2, +} + + +class YuLanMiniDecoderLayer(nn.Module): + + def __init__(self, config: YuLanMiniConfig, layer_idx: int): + super().__init__() + self.hidden_size = config.hidden_size + self.config = config + + if config.sliding_window and config._attn_implementation != "flash_attention_2": + logger.warning_once( + f"Sliding Window Attention is enabled but not implemented for `{config._attn_implementation}`; " + "unexpected results may be encountered.") + self.self_attn = YULANMINI_ATTENTION_CLASSES[ + config._attn_implementation](config=config, layer_idx=layer_idx) + self.layer_idx = layer_idx + + mlp_class = LigerSwiGLUMLP if config.use_liger else YuLanMiniMLP + self.mlp = mlp_class(config) + if self.config.wesar_weights: + self.gate_up_proj_alpha = nn.Parameter(torch.tensor(1) * self.config.gate_up_proj_alpha) + self.down_proj_alpha = nn.Parameter(torch.tensor(1) * self.config.down_proj_alpha) + else: + self.gate_up_proj_alpha=1 + self.down_proj_alpha=1 + + rms_class = LigerRMSNorm if config.use_liger else YuLanMiniRMSNorm + if config.rms_type == "llama": + rms_kwargs = {"offset": 0, "init_fn": "ones", "casting_mode": "llama"} + elif config.rms_type == "gemma": + rms_kwargs = {"offset": 1, "init_fn": "zeros", "casting_mode": "gemma"} + self.input_layernorm = rms_class(config.hidden_size, eps=config.rms_norm_eps, **rms_kwargs) + if self.config.wesar_weights and self.config.use_norm_alpha: + self.input_layernorm_alpha = nn.Parameter(torch.tensor(1) * self.config.input_layernorm_alpha) + else: + # print("哈哈,没有 use input_layernorm_alpha!!!!!!!!") + self.input_layernorm_alpha = 1 + self.post_attention_layernorm = rms_class(config.hidden_size, eps=config.rms_norm_eps, **rms_kwargs) + if self.config.wesar_weights and self.config.use_norm_alpha : + self.post_attention_layernorm_alpha = nn.Parameter(torch.tensor(1) * self.config.post_attention_layernorm_alpha) + else: + # print("哈哈,没有 use post_attention_layernorm_alpha!!!!!!!!") + self.post_attention_layernorm_alpha = 1 + + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_value: Optional[Tuple[torch.Tensor]] = None, + output_attentions: Optional[bool] = False, + use_cache: Optional[bool] = False, + cache_position: Optional[torch.LongTensor] = None, + position_embeddings: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + **kwargs, + ) -> Tuple[torch.FloatTensor, Optional[Tuple[torch.FloatTensor, + torch.FloatTensor]]]: + """ + Args: + hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)` + attention_mask (`torch.FloatTensor`, *optional*): + attention mask of size `(batch_size, sequence_length)` if flash attention is used or `(batch_size, 1, + query_sequence_length, key_sequence_length)` if default attention is used. + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under + returned tensors for more detail. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding + (see `past_key_values`). + past_key_value (`Tuple(torch.FloatTensor)`, *optional*): cached past key and value projection states + cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*): + Indices depicting the position of the input sequence tokens in the sequence. + kwargs (`dict`, *optional*): + Arbitrary kwargs to be ignored, used for FSDP and other methods that injects code + into the model + """ + + log_hidden_states = get_hidden_states_logger(self.layer_idx, self.config.num_hidden_layers) + log_weights = get_od_weight_logger(self.layer_idx, self.config.num_hidden_layers) + + residual = hidden_states + hidden_states = self.input_layernorm(hidden_states) * self.config.ln_scale * self.input_layernorm_alpha + log_hidden_states("0_input_ln", hidden_states) + hidden_states, self_attn_weights, present_key_value = self.self_attn( + hidden_states=hidden_states, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_value=past_key_value, + output_attentions=output_attentions, + use_cache=use_cache, + cache_position=cache_position, + position_embeddings=position_embeddings, + **kwargs, + ) + # log_hidden_states("2_attn", hidden_states) + shrink = self.config.hidden_states_shrink + if 0 <= shrink < 1: + # hidden_states = hidden_states * shrink + hidden_states.detach() * (1 - shrink) + hidden_states = hidden_states * shrink + hidden_states = residual + hidden_states + # log_hidden_states("3_attn_res", hidden_states) + + # Fully Connected + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) * self.config.ln_scale * self.post_attention_layernorm_alpha + log_hidden_states("4_post_ln", hidden_states) + hidden_states = hidden_states * self.gate_up_proj_alpha + hidden_states = self.mlp(hidden_states) + hidden_states = hidden_states * self.down_proj_alpha + # log_hidden_states("5_mlp", hidden_states) + + if 0 <= shrink < 1: + # hidden_states = hidden_states * shrink + hidden_states.detach() * (1 - shrink) + hidden_states = hidden_states * shrink + hidden_states = residual + hidden_states + # log_hidden_states("6_mlp_res", hidden_states) + + outputs = (hidden_states, ) + # log_weights("down_weight", self.mlp.down_proj.weight) + # log_weights("up_weight", self.mlp.up_proj.weight) + # log_weights("gate_weight", self.mlp.up_proj.weight) + # log_weights("o_proj_weight", self.self_attn.o_proj.weight) + # log_weights("q_proj_weight", self.self_attn.q_proj.weight) + # log_weights("k_proj_weight", self.self_attn.q_proj.weight) + # log_weights("v_proj_weight", self.self_attn.q_proj.weight) + # if output_attentions: + # outputs += (self_attn_weights, ) + + return outputs + + +YULANMINI_START_DOCSTRING = r""" + This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the + library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads + etc.) + + This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass. + Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage + and behavior. + + Parameters: + config ([`YuLanMiniConfig`]): + Model configuration class with all the parameters of the model. Initializing with a config file does not + load the weights associated with the model, only the configuration. Check out the + [`~PreTrainedModel.from_pretrained`] method to load the model weights. +""" + + +@add_start_docstrings( + "The bare YuLanMini Model outputting raw hidden-states without any specific head on top.", + YULANMINI_START_DOCSTRING, +) +class YuLanMiniPreTrainedModel(PreTrainedModel): + config_class = YuLanMiniConfig + base_model_prefix = "model" + supports_gradient_checkpointing = True + _no_split_modules = ["YuLanMiniDecoderLayer"] + _skip_keys_device_placement = "past_key_values" + _supports_flash_attn_2 = True + _supports_sdpa = False + _supports_cache_class = True + + def _init_weights(self, module): + std = self.config.initializer_range + if isinstance(module, nn.Linear): + # reproduce版本 + module_std = std + if not self.config.model_reproduce == "transformer": + if getattr(module, "__do_scale_tager__", False): + module_std = module_std / self.config.init_scale_o + + # muparam版本 + if getattr(module, "__do_scale_tager_mu_original__", False): + module_std = module_std + elif getattr(module, "__do_scale_tager_mu_o__", False): + if self.config.model_reproduce == "cerebras": + # module_std = module_std / math.sqrt(self.config.hidden_size / self.config.dim_model_base_init) + if self.config.dim_model_base_init is not None: + module_std = module_std / math.sqrt(2*(self.config.hidden_size / self.config.dim_model_base_init)*self.config.num_hidden_layers) + else: + module_std = module_std + elif self.config.model_reproduce == "minicpm": + if self.config.dim_model_base_init is not None: + module_std = module_std / math.sqrt((self.config.hidden_size / self.config.dim_model_base_init)) + else: + module_std = module_std + else: + if self.config.dim_model_base_init is not None: + module_std = module_std / math.sqrt((self.config.hidden_size / self.config.dim_model_base_init)) + else: + module_std = module_std + elif getattr(module, "__do_scale_tager_mu_ffn__", False): + # module_std = std / math.sqrt(self.config.intermediate_size / self.config.dim_ffn_base_init) + if self.config.model_reproduce == "cerebras": + if self.config.dim_model_base_init is not None: + module_std = module_std / math.sqrt(2*(self.config.hidden_size / self.config.dim_model_base_init)*self.config.num_hidden_layers) + else: + module_std = module_std + + elif self.config.model_reproduce == "minicpm": + if self.config.dim_model_base_init is not None: + module_std = module_std / math.sqrt((self.config.hidden_size / self.config.dim_model_base_init)) + else: + module_std = module_std + else: + if self.config.dim_model_base_init is not None: + module_std = module_std / math.sqrt((self.config.hidden_size / self.config.dim_model_base_init)) + else: + module_std = module_std + elif getattr(module, "__do_scale_tager_mu_dim_model__", False): + if self.config.dim_model_base_init is not None: + module_std = module_std / math.sqrt(self.config.hidden_size / self.config.dim_model_base_init) + else: + module_std = module_std + elif getattr(module, "__do_scale_tager_mu_dim_base_model__", False): + module_std = module_std / math.sqrt(self.config.dim_model_base_lmh) + else: + module_std = module_std + + print(f"init {module} with std {module_std} ({module.__class__.__name__})") + module.weight.data.normal_(mean=0.0, std=module_std) + if module.bias is not None: + module.bias.data.zero_() + + elif isinstance(module, nn.Embedding): + module_std = getattr(module, "__std__", std) + print(f"init {module} with std {module_std} ({module.__class__.__name__})") + module.weight.data.normal_(mean=0.0, std=module_std) + if module.padding_idx is not None: + module.weight.data[module.padding_idx].zero_() + + +YULANMINI_INPUTS_DOCSTRING = r""" + Args: + input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`): + Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide + it. + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + [What are input IDs?](../glossary#input-ids) + attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*): + Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`: + + - 1 for tokens that are **not masked**, + - 0 for tokens that are **masked**. + + [What are attention masks?](../glossary#attention-mask) + + Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and + [`PreTrainedTokenizer.__call__`] for details. + + If `past_key_values` is used, optionally only the last `input_ids` have to be input (see + `past_key_values`). + + If you want to change padding behavior, you should read [`modeling_opt._prepare_decoder_attention_mask`] + and modify to your needs. See diagram 1 in [the paper](https://arxiv.org/abs/1910.13461) for more + information on the default strategy. + + - 1 indicates the head is **not masked**, + - 0 indicates the head is **masked**. + position_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Indices of positions of each input sequence tokens in the position embeddings. Selected in the range `[0, + config.n_positions - 1]`. + + [What are position IDs?](../glossary#position-ids) + past_key_values (`Cache` or `tuple(tuple(torch.FloatTensor))`, *optional*): + Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention + blocks) that can be used to speed up sequential decoding. This typically consists in the `past_key_values` + returned by the model at a previous stage of decoding, when `use_cache=True` or `config.use_cache=True`. + + Two formats are allowed: + - a [`~cache_utils.Cache`] instance; + - Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of + shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`). This is also known as the legacy + cache format. + + The model will output the same cache format that is fed as input. If no `past_key_values` are passed, the + legacy cache format will be returned. + + If `past_key_values` are used, the user can optionally input only the last `input_ids` (those that don't + have their past key value states given to this model) of shape `(batch_size, 1)` instead of all `input_ids` + of shape `(batch_size, sequence_length)`. + inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*): + Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation. This + is useful if you want more control over how to convert `input_ids` indices into associated vectors than the + model's internal embedding lookup matrix. + use_cache (`bool`, *optional*): + If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see + `past_key_values`). + output_attentions (`bool`, *optional*): + Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned + tensors for more detail. + output_hidden_states (`bool`, *optional*): + Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for + more detail. + return_dict (`bool`, *optional*): + Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple. + cache_position (`torch.LongTensor` of shape `(sequence_length)`, *optional*): + Indices depicting the position of the input sequence tokens in the sequence. Contrarily to `position_ids`, + this tensor is not affected by padding. It is used to update the cache in the correct position and to infer + the complete sequence length. +""" + + +@add_start_docstrings( + "The bare YuLanMini Model outputting raw hidden-states without any specific head on top.", + YULANMINI_START_DOCSTRING, +) +class YuLanMiniModel(YuLanMiniPreTrainedModel): + """ + Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`YuLanMiniDecoderLayer`] + + Args: + config: YuLanMiniConfig + """ + + def __init__(self, config: YuLanMiniConfig): + super().__init__(config) + self.config = config + self.padding_idx = config.pad_token_id + self.vocab_size = config.vocab_size + + self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, + self.padding_idx) + # if self.config.wesar_weights and self.config.use_emb_alpha: + # # self.embed_tokens_alpha = nn.Parameter(torch.tensor(1.0) * self.config.embed_tokens_alpha) + # self.embed_tokens_alpha = 1 + # else: + self.embed_tokens_alpha = 1 + if not self.config.tie_word_embeddings: + self.embed_tokens.__std__ = 1.0 + + rms_class = LigerRMSNorm if config.use_liger else YuLanMiniRMSNorm + if config.rms_type == "llama": + rms_kwargs = {"offset": 0, "init_fn": "ones", "casting_mode": "llama"} + elif config.rms_type == "gemma": + rms_kwargs = {"offset": 1, "init_fn": "zeros", "casting_mode": "gemma"} + if self.config.embedding_ln: + ln_class = LigerLayerNorm if config.use_liger else nn.LayerNorm + self.embedding_layernorm = ln_class(config.hidden_size, eps=config.layer_norm_eps, bias=False) + elif self.config.embedding_rmsln: + self.embedding_layernorm = rms_class(config.hidden_size, eps=config.rms_norm_eps, **rms_kwargs) + + self.layers = nn.ModuleList([ + YuLanMiniDecoderLayer(config, layer_idx) + for layer_idx in range(config.num_hidden_layers) + ]) + self._attn_implementation = config._attn_implementation + + self.norm = rms_class(config.hidden_size, + eps=config.rms_norm_eps, **rms_kwargs) + if self.config.wesar_weights and self.config.use_norm_alpha : + self.norm_alpha = nn.Parameter(torch.tensor(1) * self.config.norm_alpha) + else: + # print("哈哈,没有 use norm_alpha!!!!!!!!") + self.norm_alpha = 1 + self._init_rope() + + self.gradient_checkpointing = True + if self.config.wesar_weights: + self.shrink_alpha = config.shrink_alpha + else: + self.shrink_alpha = 1 + self.scale_emb = config.scale_emb + self.log_hidden_states = get_hidden_states_logger(0, None) + # Initialize weights and apply final processing + self.post_init() + + def _init_rope(self): + self.rope_theta = self.config.rope_theta + self.max_position_embeddings = self.config.max_position_embeddings + self.hidden_size = self.config.hidden_size + self.num_heads = self.config.num_attention_heads + self.head_dim = self.hidden_size // self.num_heads + if self.config.rope_scaling is None: + self.rotary_emb = YuLanMiniRotaryEmbedding( + self.head_dim, + max_position_embeddings=self.max_position_embeddings, + base=self.rope_theta, + ) + else: + # https://huggingface.co/docs/text-generation-inference/basic_tutorials/preparing_model#rope-scaling + scaling_type = self.config.rope_scaling["type"] + scaling_factor = self.config.rope_scaling["factor"] + if scaling_type == "linear": + self.rotary_emb = YuLanMiniLinearScalingRotaryEmbedding( + self.head_dim, + max_position_embeddings=self.max_position_embeddings, + scaling_factor=scaling_factor, + base=self.rope_theta, + ) + elif scaling_type == "dynamic": + self.rotary_emb = YuLanMiniDynamicNTKScalingRotaryEmbedding( + self.head_dim, + max_position_embeddings=self.max_position_embeddings, + scaling_factor=scaling_factor, + base=self.rope_theta, + ) + else: + raise ValueError(f"Unknown RoPE scaling type {scaling_type}") + + def get_input_embeddings(self): + return self.embed_tokens + + def set_input_embeddings(self, value): + self.embed_tokens = value + + @add_start_docstrings_to_model_forward(YULANMINI_INPUTS_DOCSTRING) + def forward( + self, + input_ids: torch.LongTensor = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + ) -> Union[Tuple, BaseModelOutputWithPast]: + output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions + output_hidden_states = (output_hidden_states + if output_hidden_states is not None else + self.config.output_hidden_states) + use_cache = use_cache if use_cache is not None else self.config.use_cache + + return_dict = True + + if (input_ids is None) ^ (inputs_embeds is not None): + raise ValueError( + "You cannot specify both input_ids and inputs_embeds at the same time, and must specify either one" + ) + + if self.gradient_checkpointing and self.training: + if use_cache: + logger.warning_once( + "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..." + ) + use_cache = False + + use_legacy_cache = False + if use_cache and not isinstance(past_key_values, + Cache) and not self.training: + use_legacy_cache = True + past_key_values = DynamicCache.from_legacy_cache(past_key_values) + logger.warning_once( + "We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. " + "Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)" + ) + + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) * self.scale_emb + inputs_embeds = inputs_embeds * self.embed_tokens_alpha + self.log_hidden_states(0, "0_embed", inputs_embeds) + + if 0 <= self.shrink_alpha < 1: + shrink_alpha = self.shrink_alpha + inputs_embeds = inputs_embeds * shrink_alpha + inputs_embeds.detach() * (1 - shrink_alpha) + self.log_hidden_states(0, "1_shrink", inputs_embeds) + + if self.config.embedding_ln: + inputs_embeds = self.embedding_layernorm(inputs_embeds) + self.log_hidden_states(0, "2_embln", inputs_embeds) + elif self.config.embedding_rmsln: + inputs_embeds = self.embedding_layernorm(inputs_embeds) * self.config.ln_scale + self.log_hidden_states(0, "2_embln", inputs_embeds) + + if cache_position is None: + past_seen_tokens = past_key_values.get_seq_length( + ) if past_key_values is not None else 0 + cache_position = torch.arange(past_seen_tokens, + past_seen_tokens + + inputs_embeds.shape[1], + device=inputs_embeds.device) + if position_ids is None: + position_ids = cache_position.unsqueeze(0) + + causal_mask = self._update_causal_mask(attention_mask, inputs_embeds, + cache_position, past_key_values, + output_attentions) + + hidden_states = inputs_embeds + + position_embeddings = self.rotary_emb(hidden_states, hidden_states.shape[1]) # Warning: ignore the position_ids + + # decoder layers + all_hidden_states = () if output_hidden_states else None + all_self_attns = () if output_attentions else None + next_decoder_cache = None + + for idx, decoder_layer in enumerate(self.layers): + if output_hidden_states: + all_hidden_states += (hidden_states, ) + + if self.gradient_checkpointing and self.training and idx % self.config.gradient_checkpointing_step != 0: + layer_outputs = self._gradient_checkpointing_func( + decoder_layer.__call__, + hidden_states, + causal_mask, + position_ids, + past_key_values, + output_attentions, + use_cache, + cache_position, + position_embeddings, + ) + else: + layer_outputs = decoder_layer( + hidden_states, + attention_mask=causal_mask, + position_ids=position_ids, + past_key_value=past_key_values, + output_attentions=output_attentions, + use_cache=use_cache, + cache_position=cache_position, + position_embeddings=position_embeddings, + ) + + hidden_states = layer_outputs[0] + + if use_cache: + next_decoder_cache = layer_outputs[ + 2 if output_attentions else 1] + + if output_attentions: + all_self_attns += (layer_outputs[1], ) + + old_dtype = hidden_states.dtype + hidden_states = hidden_states.to(torch.float32) + hidden_states = self.norm(hidden_states) * self.config.ln_scale * self.norm_alpha + hidden_states = hidden_states.to(old_dtype) + self.log_hidden_states(7, "0_norm", hidden_states) + + # add hidden states from the last decoder layer + if output_hidden_states: + all_hidden_states += (hidden_states, ) + + next_cache = None + if use_cache: + next_cache = next_decoder_cache.to_legacy_cache( + ) if use_legacy_cache else next_decoder_cache + + return BaseModelOutputWithPast( + last_hidden_state=hidden_states, + past_key_values=next_cache, + hidden_states=all_hidden_states, + attentions=all_self_attns, + ) + + def _update_causal_mask( + self, + attention_mask: torch.Tensor, + input_tensor: torch.Tensor, + cache_position: torch.Tensor, + past_key_values: Cache, + output_attentions: bool, + ): + # TODO: As of torch==2.2.0, the `attention_mask` passed to the model in `generate` is 2D and of dynamic length even when the static + # KV cache is used. This is an issue for torch.compile which then recaptures cudagraphs at each decode steps due to the dynamic shapes. + # (`recording cudagraph tree for symint key 13`, etc.), which is VERY slow. A workaround is `@torch.compiler.disable`, but this prevents using + # `fullgraph=True`. See more context in https://github.com/huggingface/transformers/pull/29114 + + if self.config._attn_implementation == "flash_attention_2": + if attention_mask is not None and 0.0 in attention_mask: + return attention_mask + return None + + # For SDPA, when possible, we will rely on its `is_causal` argument instead of its `attn_mask` argument, in + # order to dispatch on Flash Attention 2. This feature is not compatible with static cache, as SDPA will fail + # to infer the attention mask. + past_seen_tokens = past_key_values.get_seq_length( + ) if past_key_values is not None else 0 + using_static_cache = isinstance(past_key_values, StaticCache) + + # When output attentions is True, sdpa implementation's forward method calls the eager implementation's forward + if self.config._attn_implementation == "sdpa" and not using_static_cache and not output_attentions: + if AttentionMaskConverter._ignore_causal_mask_sdpa( + attention_mask, + inputs_embeds=input_tensor, + past_key_values_length=past_seen_tokens, + is_training=self.training, + ): + return None + + dtype, device = input_tensor.dtype, input_tensor.device + min_dtype = torch.finfo(dtype).min + sequence_length = input_tensor.shape[1] + if using_static_cache: + target_length = past_key_values.get_max_length() + else: + target_length = (attention_mask.shape[-1] if isinstance( + attention_mask, torch.Tensor) else past_seen_tokens + + sequence_length + 1) + + # In case the provided `attention` mask is 2D, we generate a causal mask here (4D). + causal_mask = _prepare_4d_causal_attention_mask_with_cache_position( + attention_mask, + sequence_length=sequence_length, + target_length=target_length, + dtype=dtype, + device=device, + min_dtype=min_dtype, + cache_position=cache_position, + batch_size=input_tensor.shape[0], + ) + + if (self.config._attn_implementation == "sdpa" + and attention_mask is not None + and attention_mask.device.type == "cuda" + and not output_attentions): + # Attend to all tokens in fully masked rows in the causal_mask, for example the relevant first rows when + # using left padding. This is required by F.scaled_dot_product_attention memory-efficient attention path. + # Details: https://github.com/pytorch/pytorch/issues/110213 + causal_mask = AttentionMaskConverter._unmask_unattended( + causal_mask, min_dtype) + + return causal_mask + + +class YuLanMiniModelForCausalLM(YuLanMiniPreTrainedModel): + _tied_weights_keys = ["lm_head.weight"] + + def __init__(self, config): + super().__init__(config) + self.model = YuLanMiniModel(config) + self.config = config + self.vocab_size = config.vocab_size + self.lm_head = nn.Linear(config.hidden_size, + config.vocab_size, + bias=False) + if self.config.wesar_weights: + self.lm_head_alpha = nn.Parameter(torch.tensor(1) * self.config.lm_head_alpha) + else: + self.lm_head_alpha = 1 + # Initialize weights and apply final processing + self.lm_head.__do_scale_tager_mu_dim_base_model__ = not self.config.tie_word_embeddings + self.post_init() + + def get_input_embeddings(self): + return self.model.embed_tokens + + def set_input_embeddings(self, value): + self.model.embed_tokens = value + + def get_output_embeddings(self): + return self.lm_head + + def set_output_embeddings(self, new_embeddings): + self.lm_head = new_embeddings + + def set_decoder(self, decoder): + self.model = decoder + + def get_decoder(self): + return self.model + + @add_start_docstrings_to_model_forward(YULANMINI_INPUTS_DOCSTRING) + @replace_return_docstrings(output_type=CausalLMOutputWithPast, + config_class=_CONFIG_FOR_DOC) + def forward( + self, + input_ids: torch.LongTensor = None, + teacher_logits: list = None, + attention_mask: Optional[torch.Tensor] = None, + position_ids: Optional[torch.LongTensor] = None, + past_key_values: Optional[List[torch.FloatTensor]] = None, + inputs_embeds: Optional[torch.FloatTensor] = None, + labels: Optional[torch.LongTensor] = None, + use_cache: Optional[bool] = None, + output_attentions: Optional[bool] = None, + output_hidden_states: Optional[bool] = None, + return_dict: Optional[bool] = None, + cache_position: Optional[torch.LongTensor] = None, + subset: Optional[List[str]] = None, + idx: Optional[List[int]] = None, + ) -> Union[Tuple, CausalLMOutputWithPast]: + r""" + Args: + labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*): + Labels for computing the masked language modeling loss. Indices should either be in `[0, ..., + config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored + (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`. + + Returns: + + Example: + + ```python + >>> from transformers import AutoTokenizer, YuLanMiniForCausalLM + + >>> model = YuLanMiniForCausalLM.from_pretrained(PATH_TO_CONVERTED_WEIGHTS) + >>> tokenizer = AutoTokenizer.from_pretrained(PATH_TO_CONVERTED_TOKENIZER) + + >>> prompt = "Hey, are you conscious? Can you talk to me?" + >>> inputs = tokenizer(prompt, return_tensors="pt") + + >>> # Generate + >>> generate_ids = model.generate(inputs.input_ids, max_length=30) + >>> tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] + "Hey, are you conscious? Can you talk to me?\nI'm not conscious, but I can talk to you." + ```""" + + output_attentions = self.config.output_attentions + output_hidden_states = self.config.output_hidden_states + return_dict = True + + # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn) + outputs = self.model( + input_ids=input_ids, + attention_mask=attention_mask, + position_ids=position_ids, + past_key_values=past_key_values, + inputs_embeds=inputs_embeds, + use_cache=use_cache, + output_attentions=output_attentions, + output_hidden_states=output_hidden_states, + return_dict=return_dict, + cache_position=cache_position, + ) + + hidden_states = outputs[0] + + logits = None + loss = None + + if labels is not None: + # Shift so that tokens < n predict n + if self.config.dim_model_base_logits is not None and self.config.hidden_size != self.config.dim_model_base_logits: + hidden_states = hidden_states / (self.config.hidden_size / self.config.dim_model_base_logits) + + hidden_states = hidden_states * self.lm_head_alpha + shift_hidden_states = hidden_states[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + + shift_hidden_states = shift_hidden_states.view(-1, self.config.hidden_size) + shift_labels = shift_labels.view(-1) + + lce = LigerFusedLinearCrossEntropyLoss(lse_square_scale=self.config.z_loss) + loss = lce(self.lm_head.weight, shift_hidden_states, shift_labels) + + return CausalLMOutputWithPast( + loss=loss, + logits=logits, + past_key_values=outputs.past_key_values, + hidden_states=outputs.hidden_states, + attentions=outputs.attentions, + ) diff --git a/training_args.bin b/training_args.bin deleted file mode 100644 index e001e6b034d388931be12ad2218dde3a54325567..0000000000000000000000000000000000000000 --- a/training_args.bin +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:27a990c962875cc639fbd23de403192229520a08145bf10c4a0d1cb426e63ccb -size 10872