Leecm commited on
Commit
1cee627
·
verified ·
1 Parent(s): 00b2bd1

Upload model

Browse files
README.md CHANGED
@@ -14,6 +14,7 @@ model-index:
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
 
17
  # finetune_starcoder2
18
 
19
  This model is a fine-tuned version of [bigcode/starcoder2-3b](https://huggingface.co/bigcode/starcoder2-3b) on an unknown dataset.
@@ -48,7 +49,6 @@ The following hyperparameters were used during training:
48
  - lr_scheduler_type: cosine
49
  - lr_scheduler_warmup_steps: 20
50
  - training_steps: 10000
51
- - mixed_precision_training: Native AMP
52
 
53
  ### Training results
54
 
@@ -57,7 +57,7 @@ The following hyperparameters were used during training:
57
  ### Framework versions
58
 
59
  - PEFT 0.8.2
60
- - Transformers 4.39.3
61
- - Pytorch 2.2.2
62
  - Datasets 2.18.0
63
- - Tokenizers 0.15.2
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
  should probably proofread and complete it, then remove this comment. -->
16
 
17
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="200" height="32"/>](https://wandb.ai/codi_dankook/huggingface/runs/5f3sbz8h)
18
  # finetune_starcoder2
19
 
20
  This model is a fine-tuned version of [bigcode/starcoder2-3b](https://huggingface.co/bigcode/starcoder2-3b) on an unknown dataset.
 
49
  - lr_scheduler_type: cosine
50
  - lr_scheduler_warmup_steps: 20
51
  - training_steps: 10000
 
52
 
53
  ### Training results
54
 
 
57
  ### Framework versions
58
 
59
  - PEFT 0.8.2
60
+ - Transformers 4.41.0.dev0
61
+ - Pytorch 2.3.0
62
  - Datasets 2.18.0
63
+ - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "target_modules": [
22
  "k_proj",
23
  "v_proj",
 
24
  "o_proj",
25
  "q_proj",
26
- "gate_proj",
27
  "down_proj",
28
  "up_proj"
29
  ],
 
21
  "target_modules": [
22
  "k_proj",
23
  "v_proj",
24
+ "gate_proj",
25
  "o_proj",
26
  "q_proj",
 
27
  "down_proj",
28
  "up_proj"
29
  ],
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdd306a373798ab1a5666e822208cd0cfa345a64ab2eea10f72ac40a338b6112
3
- size 18218008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9dccf8efc2fbefc3fa3c535218d7f9d94d2266cf26981d8d9cd4a43d5eae62c
3
+ size 9124880
final_checkpoint/config.json CHANGED
@@ -19,11 +19,26 @@
19
  "num_attention_heads": 24,
20
  "num_hidden_layers": 30,
21
  "num_key_value_heads": 2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  "residual_dropout": 0.1,
23
  "rope_theta": 999999.4420358813,
24
  "sliding_window": 4096,
25
- "torch_dtype": "float32",
26
- "transformers_version": "4.39.3",
27
  "use_bias": true,
28
  "use_cache": true,
29
  "vocab_size": 49152
 
19
  "num_attention_heads": 24,
20
  "num_hidden_layers": 30,
21
  "num_key_value_heads": 2,
22
+ "quantization_config": {
23
+ "_load_in_4bit": true,
24
+ "_load_in_8bit": false,
25
+ "bnb_4bit_compute_dtype": "bfloat16",
26
+ "bnb_4bit_quant_storage": "uint8",
27
+ "bnb_4bit_quant_type": "nf4",
28
+ "bnb_4bit_use_double_quant": false,
29
+ "llm_int8_enable_fp32_cpu_offload": false,
30
+ "llm_int8_has_fp16_weight": false,
31
+ "llm_int8_skip_modules": null,
32
+ "llm_int8_threshold": 6.0,
33
+ "load_in_4bit": true,
34
+ "load_in_8bit": false,
35
+ "quant_method": "bitsandbytes"
36
+ },
37
  "residual_dropout": 0.1,
38
  "rope_theta": 999999.4420358813,
39
  "sliding_window": 4096,
40
+ "torch_dtype": "bfloat16",
41
+ "transformers_version": "4.41.0.dev0",
42
  "use_bias": true,
43
  "use_cache": true,
44
  "vocab_size": 49152
final_checkpoint/generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
- "transformers_version": "4.39.3"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 0,
4
  "eos_token_id": 0,
5
+ "transformers_version": "4.41.0.dev0"
6
  }
final_checkpoint/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11a53025c306c48adf224af26dc64b4d48f7a26f6b6363c086e4c7a4424012f7
3
+ size 1934288228
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 1024,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
@@ -382,6 +377,7 @@
382
  "end_of_word_suffix": null,
383
  "fuse_unk": false,
384
  "byte_fallback": false,
 
385
  "vocab": {
386
  "<|endoftext|>": 0,
387
  "<fim_prefix>": 1,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
377
  "end_of_word_suffix": null,
378
  "fuse_unk": false,
379
  "byte_fallback": false,
380
+ "ignore_merges": false,
381
  "vocab": {
382
  "<|endoftext|>": 0,
383
  "<fim_prefix>": 1,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e797fda69961c9263b7afd1004d35aa09ead71ef1aaa45c4bc0d8ca1401cc031
3
- size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b8fe122be92cf5df03a1b23adb6c226b6b076a5b1d6d2626c633c2d92fccf6
3
+ size 5112