Vui Seng Chua commited on
Commit
afbe97d
·
1 Parent(s): 7140a05

Add content

Browse files
README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ tags:
4
+ - generated_from_trainer
5
+ datasets:
6
+ - wikitext
7
+ metrics:
8
+ - accuracy
9
+ model-index:
10
+ - name: ov-opt-350m-8bit-kv-cache
11
+ results:
12
+ - task:
13
+ name: Causal Language Modeling
14
+ type: text-generation
15
+ dataset:
16
+ name: wikitext wikitext-2-raw-v1
17
+ type: wikitext
18
+ config: wikitext-2-raw-v1
19
+ split: validation
20
+ args: wikitext-2-raw-v1
21
+ metrics:
22
+ - name: Accuracy
23
+ type: accuracy
24
+ value: 0.39524275008145976
25
+ ---
26
+
27
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
+ should probably proofread and complete it, then remove this comment. -->
29
+
30
+ # ov-opt-350m-8bit-kv-cache
31
+
32
+ This model is a fine-tuned version of [facebook/opt-350m](https://huggingface.co/facebook/opt-350m) on the wikitext wikitext-2-raw-v1 dataset.
33
+ It achieves the following results on the evaluation set:
34
+ - Loss: 3.2990
35
+ - Accuracy: 0.3952
36
+
37
+ ## Model description
38
+
39
+ More information needed
40
+
41
+ ## Intended uses & limitations
42
+
43
+ More information needed
44
+
45
+ ## Training and evaluation data
46
+
47
+ More information needed
48
+
49
+ ## Training procedure
50
+
51
+ ### Training hyperparameters
52
+
53
+ The following hyperparameters were used during training:
54
+ - learning_rate: 5e-05
55
+ - train_batch_size: 8
56
+ - eval_batch_size: 1
57
+ - seed: 42
58
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
59
+ - lr_scheduler_type: linear
60
+ - training_steps: 1
61
+
62
+ ### Training results
63
+
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - Transformers 4.30.2
69
+ - Pytorch 2.0.1+cu117
70
+ - Datasets 2.13.1
71
+ - Tokenizers 0.13.3
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.0,
3
+ "eval_accuracy": 0.39524275008145976,
4
+ "eval_loss": 3.2989656925201416,
5
+ "eval_runtime": 5.7076,
6
+ "eval_samples": 3,
7
+ "eval_samples_per_second": 0.526,
8
+ "eval_steps_per_second": 0.526,
9
+ "perplexity": 27.084610612866452,
10
+ "train_loss": 3.889699697494507,
11
+ "train_runtime": 39.4108,
12
+ "train_samples": 2355,
13
+ "train_samples_per_second": 0.203,
14
+ "train_steps_per_second": 0.025
15
+ }
compressed_graph.dot ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/opt-350m",
3
+ "_remove_final_layer_norm": false,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "relu",
6
+ "architectures": [
7
+ "OPTForCausalLM"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "bos_token_id": 2,
11
+ "do_layer_norm_before": false,
12
+ "dropout": 0.1,
13
+ "enable_bias": true,
14
+ "eos_token_id": 2,
15
+ "ffn_dim": 4096,
16
+ "hidden_size": 1024,
17
+ "init_std": 0.02,
18
+ "layer_norm_elementwise_affine": true,
19
+ "layerdrop": 0.0,
20
+ "max_position_embeddings": 2048,
21
+ "model_type": "opt",
22
+ "num_attention_heads": 16,
23
+ "num_hidden_layers": 24,
24
+ "pad_token_id": 1,
25
+ "prefix": "</s>",
26
+ "torch_dtype": "float32",
27
+ "transformers_version": "4.30.2",
28
+ "use_cache": true,
29
+ "vocab_size": 50272,
30
+ "word_embed_proj_dim": 512
31
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.0,
3
+ "eval_accuracy": 0.39524275008145976,
4
+ "eval_loss": 3.2989656925201416,
5
+ "eval_runtime": 5.7076,
6
+ "eval_samples": 3,
7
+ "eval_samples_per_second": 0.526,
8
+ "eval_steps_per_second": 0.526,
9
+ "perplexity": 27.084610612866452
10
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 2,
4
+ "eos_token_id": 2,
5
+ "pad_token_id": 1,
6
+ "transformers_version": "4.30.2"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
nncf_output.log ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO:nncf:Not adding activation input quantizer for operation: 3 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/NNCFEmbedding[embed_tokens]/embedding_0
2
+ INFO:nncf:Not adding activation input quantizer for operation: 6 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/OPTLearnedPositionalEmbedding[embed_positions]/long_0
3
+ INFO:nncf:Not adding activation input quantizer for operation: 7 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/OPTLearnedPositionalEmbedding[embed_positions]/cumsum_0
4
+ INFO:nncf:Not adding activation input quantizer for operation: 8 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/OPTLearnedPositionalEmbedding[embed_positions]/type_as_0
5
+ INFO:nncf:Not adding activation input quantizer for operation: 9 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/OPTLearnedPositionalEmbedding[embed_positions]/__mul___0
6
+ INFO:nncf:Not adding activation input quantizer for operation: 10 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/OPTLearnedPositionalEmbedding[embed_positions]/long_1
7
+ INFO:nncf:Not adding activation input quantizer for operation: 11 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/OPTLearnedPositionalEmbedding[embed_positions]/__sub___0
8
+ INFO:nncf:Not adding activation input quantizer for operation: 12 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/OPTLearnedPositionalEmbedding[embed_positions]/__getitem___0
9
+ INFO:nncf:Not adding activation input quantizer for operation: 13 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/OPTLearnedPositionalEmbedding[embed_positions]/__add___0
10
+ INFO:nncf:Not adding activation input quantizer for operation: 14 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/OPTLearnedPositionalEmbedding[embed_positions]/embedding_0
11
+ INFO:nncf:Not adding activation input quantizer for operation: 16 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/__add___0
12
+ INFO:nncf:Not adding activation input quantizer for operation: 36 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[0]/OPTAttention[self_attn]/__add___0
13
+ INFO:nncf:Not adding activation input quantizer for operation: 47 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[0]/__add___0
14
+ INFO:nncf:Not adding activation input quantizer for operation: 48 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[0]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
15
+ INFO:nncf:Not adding activation input quantizer for operation: 54 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[0]/__add___1
16
+ INFO:nncf:Not adding activation input quantizer for operation: 56 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[0]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
17
+ INFO:nncf:Not adding activation input quantizer for operation: 76 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[1]/OPTAttention[self_attn]/__add___0
18
+ INFO:nncf:Not adding activation input quantizer for operation: 87 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[1]/__add___0
19
+ INFO:nncf:Not adding activation input quantizer for operation: 88 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[1]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
20
+ INFO:nncf:Not adding activation input quantizer for operation: 94 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[1]/__add___1
21
+ INFO:nncf:Not adding activation input quantizer for operation: 96 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[1]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
22
+ INFO:nncf:Not adding activation input quantizer for operation: 116 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[2]/OPTAttention[self_attn]/__add___0
23
+ INFO:nncf:Not adding activation input quantizer for operation: 127 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[2]/__add___0
24
+ INFO:nncf:Not adding activation input quantizer for operation: 128 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[2]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
25
+ INFO:nncf:Not adding activation input quantizer for operation: 134 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[2]/__add___1
26
+ INFO:nncf:Not adding activation input quantizer for operation: 136 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[2]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
27
+ INFO:nncf:Not adding activation input quantizer for operation: 156 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[3]/OPTAttention[self_attn]/__add___0
28
+ INFO:nncf:Not adding activation input quantizer for operation: 167 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[3]/__add___0
29
+ INFO:nncf:Not adding activation input quantizer for operation: 168 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[3]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
30
+ INFO:nncf:Not adding activation input quantizer for operation: 174 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[3]/__add___1
31
+ INFO:nncf:Not adding activation input quantizer for operation: 176 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[3]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
32
+ INFO:nncf:Not adding activation input quantizer for operation: 196 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[4]/OPTAttention[self_attn]/__add___0
33
+ INFO:nncf:Not adding activation input quantizer for operation: 207 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[4]/__add___0
34
+ INFO:nncf:Not adding activation input quantizer for operation: 208 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[4]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
35
+ INFO:nncf:Not adding activation input quantizer for operation: 214 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[4]/__add___1
36
+ INFO:nncf:Not adding activation input quantizer for operation: 216 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[4]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
37
+ INFO:nncf:Not adding activation input quantizer for operation: 236 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[5]/OPTAttention[self_attn]/__add___0
38
+ INFO:nncf:Not adding activation input quantizer for operation: 247 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[5]/__add___0
39
+ INFO:nncf:Not adding activation input quantizer for operation: 248 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[5]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
40
+ INFO:nncf:Not adding activation input quantizer for operation: 254 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[5]/__add___1
41
+ INFO:nncf:Not adding activation input quantizer for operation: 256 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[5]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
42
+ INFO:nncf:Not adding activation input quantizer for operation: 276 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[6]/OPTAttention[self_attn]/__add___0
43
+ INFO:nncf:Not adding activation input quantizer for operation: 287 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[6]/__add___0
44
+ INFO:nncf:Not adding activation input quantizer for operation: 288 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[6]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
45
+ INFO:nncf:Not adding activation input quantizer for operation: 294 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[6]/__add___1
46
+ INFO:nncf:Not adding activation input quantizer for operation: 296 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[6]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
47
+ INFO:nncf:Not adding activation input quantizer for operation: 316 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[7]/OPTAttention[self_attn]/__add___0
48
+ INFO:nncf:Not adding activation input quantizer for operation: 327 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[7]/__add___0
49
+ INFO:nncf:Not adding activation input quantizer for operation: 328 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[7]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
50
+ INFO:nncf:Not adding activation input quantizer for operation: 334 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[7]/__add___1
51
+ INFO:nncf:Not adding activation input quantizer for operation: 336 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[7]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
52
+ INFO:nncf:Not adding activation input quantizer for operation: 356 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[8]/OPTAttention[self_attn]/__add___0
53
+ INFO:nncf:Not adding activation input quantizer for operation: 367 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[8]/__add___0
54
+ INFO:nncf:Not adding activation input quantizer for operation: 368 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[8]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
55
+ INFO:nncf:Not adding activation input quantizer for operation: 374 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[8]/__add___1
56
+ INFO:nncf:Not adding activation input quantizer for operation: 376 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[8]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
57
+ INFO:nncf:Not adding activation input quantizer for operation: 396 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[9]/OPTAttention[self_attn]/__add___0
58
+ INFO:nncf:Not adding activation input quantizer for operation: 407 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[9]/__add___0
59
+ INFO:nncf:Not adding activation input quantizer for operation: 408 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[9]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
60
+ INFO:nncf:Not adding activation input quantizer for operation: 414 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[9]/__add___1
61
+ INFO:nncf:Not adding activation input quantizer for operation: 416 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[9]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
62
+ INFO:nncf:Not adding activation input quantizer for operation: 436 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[10]/OPTAttention[self_attn]/__add___0
63
+ INFO:nncf:Not adding activation input quantizer for operation: 447 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[10]/__add___0
64
+ INFO:nncf:Not adding activation input quantizer for operation: 448 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[10]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
65
+ INFO:nncf:Not adding activation input quantizer for operation: 454 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[10]/__add___1
66
+ INFO:nncf:Not adding activation input quantizer for operation: 456 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[10]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
67
+ INFO:nncf:Not adding activation input quantizer for operation: 476 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[11]/OPTAttention[self_attn]/__add___0
68
+ INFO:nncf:Not adding activation input quantizer for operation: 487 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[11]/__add___0
69
+ INFO:nncf:Not adding activation input quantizer for operation: 488 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[11]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
70
+ INFO:nncf:Not adding activation input quantizer for operation: 494 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[11]/__add___1
71
+ INFO:nncf:Not adding activation input quantizer for operation: 496 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[11]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
72
+ INFO:nncf:Not adding activation input quantizer for operation: 516 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[12]/OPTAttention[self_attn]/__add___0
73
+ INFO:nncf:Not adding activation input quantizer for operation: 527 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[12]/__add___0
74
+ INFO:nncf:Not adding activation input quantizer for operation: 528 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[12]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
75
+ INFO:nncf:Not adding activation input quantizer for operation: 534 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[12]/__add___1
76
+ INFO:nncf:Not adding activation input quantizer for operation: 536 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[12]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
77
+ INFO:nncf:Not adding activation input quantizer for operation: 556 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[13]/OPTAttention[self_attn]/__add___0
78
+ INFO:nncf:Not adding activation input quantizer for operation: 567 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[13]/__add___0
79
+ INFO:nncf:Not adding activation input quantizer for operation: 568 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[13]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
80
+ INFO:nncf:Not adding activation input quantizer for operation: 574 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[13]/__add___1
81
+ INFO:nncf:Not adding activation input quantizer for operation: 576 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[13]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
82
+ INFO:nncf:Not adding activation input quantizer for operation: 596 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[14]/OPTAttention[self_attn]/__add___0
83
+ INFO:nncf:Not adding activation input quantizer for operation: 607 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[14]/__add___0
84
+ INFO:nncf:Not adding activation input quantizer for operation: 608 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[14]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
85
+ INFO:nncf:Not adding activation input quantizer for operation: 614 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[14]/__add___1
86
+ INFO:nncf:Not adding activation input quantizer for operation: 616 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[14]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
87
+ INFO:nncf:Not adding activation input quantizer for operation: 636 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[15]/OPTAttention[self_attn]/__add___0
88
+ INFO:nncf:Not adding activation input quantizer for operation: 647 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[15]/__add___0
89
+ INFO:nncf:Not adding activation input quantizer for operation: 648 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[15]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
90
+ INFO:nncf:Not adding activation input quantizer for operation: 654 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[15]/__add___1
91
+ INFO:nncf:Not adding activation input quantizer for operation: 656 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[15]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
92
+ INFO:nncf:Not adding activation input quantizer for operation: 676 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[16]/OPTAttention[self_attn]/__add___0
93
+ INFO:nncf:Not adding activation input quantizer for operation: 687 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[16]/__add___0
94
+ INFO:nncf:Not adding activation input quantizer for operation: 688 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[16]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
95
+ INFO:nncf:Not adding activation input quantizer for operation: 694 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[16]/__add___1
96
+ INFO:nncf:Not adding activation input quantizer for operation: 696 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[16]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
97
+ INFO:nncf:Not adding activation input quantizer for operation: 716 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[17]/OPTAttention[self_attn]/__add___0
98
+ INFO:nncf:Not adding activation input quantizer for operation: 727 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[17]/__add___0
99
+ INFO:nncf:Not adding activation input quantizer for operation: 728 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[17]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
100
+ INFO:nncf:Not adding activation input quantizer for operation: 734 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[17]/__add___1
101
+ INFO:nncf:Not adding activation input quantizer for operation: 736 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[17]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
102
+ INFO:nncf:Not adding activation input quantizer for operation: 756 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[18]/OPTAttention[self_attn]/__add___0
103
+ INFO:nncf:Not adding activation input quantizer for operation: 767 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[18]/__add___0
104
+ INFO:nncf:Not adding activation input quantizer for operation: 768 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[18]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
105
+ INFO:nncf:Not adding activation input quantizer for operation: 774 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[18]/__add___1
106
+ INFO:nncf:Not adding activation input quantizer for operation: 776 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[18]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
107
+ INFO:nncf:Not adding activation input quantizer for operation: 796 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[19]/OPTAttention[self_attn]/__add___0
108
+ INFO:nncf:Not adding activation input quantizer for operation: 807 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[19]/__add___0
109
+ INFO:nncf:Not adding activation input quantizer for operation: 808 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[19]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
110
+ INFO:nncf:Not adding activation input quantizer for operation: 814 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[19]/__add___1
111
+ INFO:nncf:Not adding activation input quantizer for operation: 816 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[19]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
112
+ INFO:nncf:Not adding activation input quantizer for operation: 836 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[20]/OPTAttention[self_attn]/__add___0
113
+ INFO:nncf:Not adding activation input quantizer for operation: 847 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[20]/__add___0
114
+ INFO:nncf:Not adding activation input quantizer for operation: 848 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[20]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
115
+ INFO:nncf:Not adding activation input quantizer for operation: 854 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[20]/__add___1
116
+ INFO:nncf:Not adding activation input quantizer for operation: 856 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[20]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
117
+ INFO:nncf:Not adding activation input quantizer for operation: 876 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[21]/OPTAttention[self_attn]/__add___0
118
+ INFO:nncf:Not adding activation input quantizer for operation: 887 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[21]/__add___0
119
+ INFO:nncf:Not adding activation input quantizer for operation: 888 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[21]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
120
+ INFO:nncf:Not adding activation input quantizer for operation: 894 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[21]/__add___1
121
+ INFO:nncf:Not adding activation input quantizer for operation: 896 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[21]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
122
+ INFO:nncf:Not adding activation input quantizer for operation: 916 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[22]/OPTAttention[self_attn]/__add___0
123
+ INFO:nncf:Not adding activation input quantizer for operation: 927 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[22]/__add___0
124
+ INFO:nncf:Not adding activation input quantizer for operation: 928 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[22]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
125
+ INFO:nncf:Not adding activation input quantizer for operation: 934 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[22]/__add___1
126
+ INFO:nncf:Not adding activation input quantizer for operation: 936 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[22]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
127
+ INFO:nncf:Not adding activation input quantizer for operation: 956 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[23]/OPTAttention[self_attn]/__add___0
128
+ INFO:nncf:Not adding activation input quantizer for operation: 967 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[23]/__add___0
129
+ INFO:nncf:Not adding activation input quantizer for operation: 968 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[23]/NNCFLayerNorm[self_attn_layer_norm]/layer_norm_0
130
+ INFO:nncf:Not adding activation input quantizer for operation: 974 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[23]/__add___1
131
+ INFO:nncf:Not adding activation input quantizer for operation: 976 OPTForCausalLM/OPTModel[model]/OPTDecoder[decoder]/ModuleList[layers]/OPTDecoderLayer[23]/NNCFLayerNorm[final_layer_norm]/layer_norm_0
132
+ INFO:nncf:Collecting tensor statistics |████████████████| 1 / 1
133
+ INFO:nncf:Compiling and loading torch extension: quantized_functions_cpu...
134
+ INFO:nncf:Finished loading torch extension: quantized_functions_cpu
135
+ INFO:nncf:Statistics of the quantization algorithm:
136
+ Epoch 0 |+--------------------------------+-------+
137
+ Epoch 0 || Statistic's name | Value |
138
+ Epoch 0 |+================================+=======+
139
+ Epoch 0 || Ratio of enabled quantizations | 100 |
140
+ Epoch 0 |+--------------------------------+-------+
141
+ Epoch 0 |
142
+ Epoch 0 |Statistics of the quantization share:
143
+ Epoch 0 |+----------------------------------+----------------------+
144
+ Epoch 0 || Statistic's name | Value |
145
+ Epoch 0 |+==================================+======================+
146
+ Epoch 0 || Symmetric WQs / All placed WQs | 100.00 % (147 / 147) |
147
+ Epoch 0 |+----------------------------------+----------------------+
148
+ Epoch 0 || Asymmetric WQs / All placed WQs | 0.00 % (0 / 147) |
149
+ Epoch 0 |+----------------------------------+----------------------+
150
+ Epoch 0 || Signed WQs / All placed WQs | 100.00 % (147 / 147) |
151
+ Epoch 0 |+----------------------------------+----------------------+
152
+ Epoch 0 || Unsigned WQs / All placed WQs | 0.00 % (0 / 147) |
153
+ Epoch 0 |+----------------------------------+----------------------+
154
+ Epoch 0 || Per-tensor WQs / All placed WQs | 0.00 % (0 / 147) |
155
+ Epoch 0 |+----------------------------------+----------------------+
156
+ Epoch 0 || Per-channel WQs / All placed WQs | 100.00 % (147 / 147) |
157
+ Epoch 0 |+----------------------------------+----------------------+
158
+ Epoch 0 || Placed WQs / Potential WQs | 75.00 % (147 / 196) |
159
+ Epoch 0 |+----------------------------------+----------------------+
160
+ Epoch 0 || Symmetric AQs / All placed AQs | 100.00 % (243 / 243) |
161
+ Epoch 0 |+----------------------------------+----------------------+
162
+ Epoch 0 || Asymmetric AQs / All placed AQs | 0.00 % (0 / 243) |
163
+ Epoch 0 |+----------------------------------+----------------------+
164
+ Epoch 0 || Signed AQs / All placed AQs | 80.25 % (195 / 243) |
165
+ Epoch 0 |+----------------------------------+----------------------+
166
+ Epoch 0 || Unsigned AQs / All placed AQs | 19.75 % (48 / 243) |
167
+ Epoch 0 |+----------------------------------+----------------------+
168
+ Epoch 0 || Per-tensor AQs / All placed AQs | 100.00 % (243 / 243) |
169
+ Epoch 0 |+----------------------------------+----------------------+
170
+ Epoch 0 || Per-channel AQs / All placed AQs | 0.00 % (0 / 243) |
171
+ Epoch 0 |+----------------------------------+----------------------+
172
+ Epoch 0 |
173
+ Epoch 0 |Statistics of the bitwidth distribution:
174
+ Epoch 0 |+--------------+---------------------+--------------------+--------------------+
175
+ Epoch 0 || Num bits (N) | N-bits WQs / Placed | N-bits AQs / | N-bits Qs / Placed |
176
+ Epoch 0 || | WQs | Placed AQs | Qs |
177
+ Epoch 0 |+==============+=====================+====================+====================+
178
+ Epoch 0 || 8 | 100.00 % (147 / | 100.00 % (243 / | 100.00 % (390 / |
179
+ Epoch 0 || | 147) | 243) | 390) |
180
+ Epoch 0 |+--------------+---------------------+--------------------+--------------------+
181
+ WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
182
+ NNCF relies on custom-wrapping the `forward` call in order to function properly.
183
+ Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour.
184
+ If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
185
+ model.nncf.set_original_unbound_forward(fn)
186
+ if `fn` has an unbound 0-th `self` argument, or
187
+ with model.nncf.temporary_bound_original_forward(fn): ...
188
+ if `fn` already had 0-th `self` argument bound or never had it in the first place.
189
+ WARNING:nncf:You are setting `forward` on an NNCF-processed model object.
190
+ NNCF relies on custom-wrapping the `forward` call in order to function properly.
191
+ Arbitrary adjustments to the forward function on an NNCFNetwork object have undefined behaviour.
192
+ If you need to replace the underlying forward function of the original model so that NNCF should be using that instead of the original forward function that NNCF saved during the compressed model creation, you can do this by calling:
193
+ model.nncf.set_original_unbound_forward(fn)
194
+ if `fn` has an unbound 0-th `self` argument, or
195
+ with model.nncf.temporary_bound_original_forward(fn): ...
196
+ if `fn` already had 0-th `self` argument bound or never had it in the first place.
openvino_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compression": {
3
+ "algorithm": "quantization",
4
+ "export_to_onnx_standard_ops": false,
5
+ "ignored_scopes": [
6
+ "{re}.*Embedding*",
7
+ "{re}.*__add___*",
8
+ "{re}.*layer_norm_*"
9
+ ],
10
+ "initializer": {
11
+ "batchnorm_adaptation": {
12
+ "num_bn_adaptation_samples": 0
13
+ },
14
+ "range": {
15
+ "num_init_samples": 8,
16
+ "type": "mean_min_max"
17
+ }
18
+ },
19
+ "overflow_fix": "disable",
20
+ "scope_overrides": {
21
+ "activations": {
22
+ "activations": {
23
+ "mode": "symmetric"
24
+ },
25
+ "weights": {
26
+ "mode": "symmetric"
27
+ }
28
+ }
29
+ }
30
+ },
31
+ "input_info": [
32
+ {
33
+ "keyword": "input_ids",
34
+ "sample_size": [
35
+ 8,
36
+ 1024
37
+ ],
38
+ "type": "long"
39
+ },
40
+ {
41
+ "keyword": "attention_mask",
42
+ "sample_size": [
43
+ 8,
44
+ 1024
45
+ ],
46
+ "type": "long"
47
+ }
48
+ ],
49
+ "log_dir": "/data1/vchua/temp/ov-opt-350m-8bit-kv-cache",
50
+ "optimum_version": "1.8.8",
51
+ "save_onnx_model": false,
52
+ "transformers_version": "4.30.2"
53
+ }
openvino_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2e8dfdc5f755d20192b0a9c2df045f30e1f8383835d98f5b32ef0e5d3b71792
3
+ size 442503296
openvino_model.xml ADDED
The diff for this file is too large to render. See raw diff
 
original_graph.dot ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dee581e3b3d17a203c595207964ddb7e97c2930765d1a3dc8dca1316b1e3912e
3
+ size 1326641137
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "</s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "</s>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_prefix_space": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "</s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": true,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "errors": "replace",
22
+ "model_max_length": 1000000000000000019884624838656,
23
+ "pad_token": {
24
+ "__type": "AddedToken",
25
+ "content": "<pad>",
26
+ "lstrip": false,
27
+ "normalized": true,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ },
31
+ "tokenizer_class": "GPT2Tokenizer",
32
+ "unk_token": {
33
+ "__type": "AddedToken",
34
+ "content": "</s>",
35
+ "lstrip": false,
36
+ "normalized": true,
37
+ "rstrip": false,
38
+ "single_word": false
39
+ }
40
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.0,
3
+ "train_loss": 3.889699697494507,
4
+ "train_runtime": 39.4108,
5
+ "train_samples": 2355,
6
+ "train_samples_per_second": 0.203,
7
+ "train_steps_per_second": 0.025
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.003389830508474576,
5
+ "global_step": 1,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "step": 1,
13
+ "total_flos": 14924118441984.0,
14
+ "train_loss": 3.889699697494507,
15
+ "train_runtime": 39.4108,
16
+ "train_samples_per_second": 0.203,
17
+ "train_steps_per_second": 0.025
18
+ }
19
+ ],
20
+ "max_steps": 1,
21
+ "num_train_epochs": 1,
22
+ "total_flos": 14924118441984.0,
23
+ "trial_name": null,
24
+ "trial_params": null
25
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7903bf9c017842a1fa70e3b28f38662ef9e65e5af9d096ace92e488c5b03b1fe
3
+ size 3963
vocab.json ADDED
The diff for this file is too large to render. See raw diff