|
lm_head: |
|
accum_format: SAME |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Linear |
|
output_format: SAME |
|
weight_format: SAME |
|
weight_sparseness: DENSE |
|
transformer.drop: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.0.attn.attn_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
transformer.h.0.attn.c_attn: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.0.attn.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.0.attn.resid_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.0.attn.softmax: |
|
approximation_function: SOFTMAX(base2,float16) |
|
input_format: SAME |
|
instance: Softmax |
|
output_format: SAME |
|
transformer.h.0.ln_1: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.0.ln_2: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.0.mlp.act: |
|
approximation_function: GELU(poly2,float16) |
|
input_format: SAME |
|
instance: GELU |
|
output_format: SAME |
|
transformer.h.0.mlp.c_fc: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.0.mlp.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.0.mlp.dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.1.attn.attn_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
transformer.h.1.attn.c_attn: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.1.attn.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.1.attn.resid_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.1.attn.softmax: |
|
approximation_function: SOFTMAX(base2,float16) |
|
input_format: SAME |
|
instance: Softmax |
|
output_format: SAME |
|
transformer.h.1.ln_1: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.1.ln_2: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.1.mlp.act: |
|
approximation_function: GELU(poly2,float16) |
|
input_format: SAME |
|
instance: GELU |
|
output_format: SAME |
|
transformer.h.1.mlp.c_fc: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.1.mlp.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.1.mlp.dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.2.attn.attn_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
transformer.h.2.attn.c_attn: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.2.attn.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.2.attn.resid_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.2.attn.softmax: |
|
approximation_function: SOFTMAX(base2,float16) |
|
input_format: SAME |
|
instance: Softmax |
|
output_format: SAME |
|
transformer.h.2.ln_1: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.2.ln_2: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.2.mlp.act: |
|
approximation_function: GELU(poly2,float16) |
|
input_format: SAME |
|
instance: GELU |
|
output_format: SAME |
|
transformer.h.2.mlp.c_fc: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.2.mlp.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.2.mlp.dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.3.attn.attn_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
transformer.h.3.attn.c_attn: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.3.attn.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.3.attn.resid_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.3.attn.softmax: |
|
approximation_function: SOFTMAX(base2,float16) |
|
input_format: SAME |
|
instance: Softmax |
|
output_format: SAME |
|
transformer.h.3.ln_1: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.3.ln_2: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.3.mlp.act: |
|
approximation_function: GELU(poly2,float16) |
|
input_format: SAME |
|
instance: GELU |
|
output_format: SAME |
|
transformer.h.3.mlp.c_fc: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.3.mlp.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.3.mlp.dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.4.attn.attn_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
transformer.h.4.attn.c_attn: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.4.attn.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.4.attn.resid_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.4.attn.softmax: |
|
approximation_function: SOFTMAX(base2,float16) |
|
input_format: SAME |
|
instance: Softmax |
|
output_format: SAME |
|
transformer.h.4.ln_1: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.4.ln_2: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.4.mlp.act: |
|
approximation_function: GELU(poly2,float16) |
|
input_format: SAME |
|
instance: GELU |
|
output_format: SAME |
|
transformer.h.4.mlp.c_fc: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.4.mlp.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.4.mlp.dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.5.attn.attn_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
transformer.h.5.attn.c_attn: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: BFP[8|8]{64,-1}(SN) |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.5.attn.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.5.attn.resid_dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.h.5.attn.softmax: |
|
approximation_function: SOFTMAX(base2,float16) |
|
input_format: SAME |
|
instance: Softmax |
|
output_format: SAME |
|
transformer.h.5.ln_1: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.5.ln_2: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
transformer.h.5.mlp.act: |
|
approximation_function: GELU(poly2,float16) |
|
input_format: SAME |
|
instance: GELU |
|
output_format: SAME |
|
transformer.h.5.mlp.c_fc: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.5.mlp.c_proj: |
|
approximation_function: NONE |
|
bias_format: SAME |
|
input_format: BFP[8|8]{64,-1}(SN) |
|
instance: HFTransformersConv1D |
|
output_format: SAME |
|
weight_format: BFP[8|8]{64,0}(SN) |
|
weight_sparseness: DENSE |
|
transformer.h.5.mlp.dropout: |
|
approximation_function: NONE |
|
input_format: SAME |
|
instance: Dropout |
|
output_format: SAME |
|
transformer.ln_f: |
|
approximation_function: LAYERNORM(fallback,4,float16) |
|
bias_format: SAME |
|
input_format: SAME |
|
instance: LayerNorm |
|
output_format: SAME |
|
weight_format: SAME |
|
|