sean-xl-y commited on
Commit
974a42b
1 Parent(s): fbe57e5

qwen2-chat-14b-chinese

Browse files
README.md CHANGED
@@ -1,6 +1,5 @@
1
  ---
2
- license: other
3
- base_model: baichuan-inc/Baichuan2-13B-Chat
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # results
15
 
16
- This model is a fine-tuned version of [baichuan-inc/Baichuan2-13B-Chat](https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat) on an unknown dataset.
17
 
18
  ## Model description
19
 
@@ -33,11 +32,11 @@ More information needed
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 0.0002
36
- - train_batch_size: 4
37
  - eval_batch_size: 8
38
  - seed: 42
39
- - gradient_accumulation_steps: 4
40
- - total_train_batch_size: 16
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: constant
43
  - lr_scheduler_warmup_ratio: 0.03
@@ -49,7 +48,7 @@ The following hyperparameters were used during training:
49
 
50
  ### Framework versions
51
 
52
- - Transformers 4.33.2
53
- - Pytorch 2.0.1+cu118
54
  - Datasets 2.14.5
55
- - Tokenizers 0.13.3
 
1
  ---
2
+ base_model: Qwen/Qwen-14B-Chat
 
3
  tags:
4
  - generated_from_trainer
5
  model-index:
 
12
 
13
  # results
14
 
15
+ This model is a fine-tuned version of [Qwen/Qwen-14B-Chat](https://huggingface.co/Qwen/Qwen-14B-Chat) on an unknown dataset.
16
 
17
  ## Model description
18
 
 
32
 
33
  The following hyperparameters were used during training:
34
  - learning_rate: 0.0002
35
+ - train_batch_size: 2
36
  - eval_batch_size: 8
37
  - seed: 42
38
+ - gradient_accumulation_steps: 2
39
+ - total_train_batch_size: 4
40
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
41
  - lr_scheduler_type: constant
42
  - lr_scheduler_warmup_ratio: 0.03
 
48
 
49
  ### Framework versions
50
 
51
+ - Transformers 4.34.0
52
+ - Pytorch 2.1.0+cu121
53
  - Datasets 2.14.5
54
+ - Tokenizers 0.14.1
adapter_config.json CHANGED
@@ -1,6 +1,7 @@
1
  {
 
2
  "auto_mapping": null,
3
- "base_model_name_or_path": "baichuan-inc/Baichuan2-13B-Chat",
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
@@ -12,10 +13,11 @@
12
  "modules_to_save": null,
13
  "peft_type": "LORA",
14
  "r": 1,
 
15
  "revision": null,
16
  "target_modules": [
17
- "W_pack",
18
- "o_proj"
19
  ],
20
  "task_type": "CAUSAL_LM"
21
  }
 
1
  {
2
+ "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen-14B-Chat",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
  "r": 1,
16
+ "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
+ "c_attn",
20
+ "c_proj"
21
  ],
22
  "task_type": "CAUSAL_LM"
23
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3456e260d60c1c9a2186b8bd97a365a9935a87a1953bb67b92e5cddffafe1fb0
3
- size 4972557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:535173193eff181e5d75a310c4ecc0c41a86d716bf8107cc2b5bf5c2aae1e619
3
+ size 8011402
qwen.tiktoken ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,24 +1,3 @@
1
  {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": true,
6
- "rstrip": false,
7
- "single_word": true
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": true,
13
- "rstrip": false,
14
- "single_word": true
15
- },
16
- "pad_token": "</s>",
17
- "unk_token": {
18
- "content": "<unk>",
19
- "lstrip": false,
20
- "normalized": true,
21
- "rstrip": false,
22
- "single_word": true
23
- }
24
  }
 
1
  {
2
+ "pad_token": "<|endoftext|>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  }
tokenizer_config.json CHANGED
@@ -1,46 +1,15 @@
1
  {
2
- "add_bos_token": false,
3
- "add_eos_token": false,
4
  "auto_map": {
5
  "AutoTokenizer": [
6
- "baichuan-inc/Baichuan2-13B-Chat--tokenization_baichuan.BaichuanTokenizer",
7
  null
8
  ]
9
  },
10
- "bos_token": {
11
- "__type": "AddedToken",
12
- "content": "<s>",
13
- "lstrip": false,
14
- "normalized": true,
15
- "rstrip": false,
16
- "single_word": true
17
- },
18
- "clean_up_tokenization_spaces": false,
19
- "eos_token": {
20
- "__type": "AddedToken",
21
- "content": "</s>",
22
- "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": true
26
- },
27
- "model_max_length": 4096,
28
- "pad_token": {
29
- "__type": "AddedToken",
30
- "content": "<unk>",
31
- "lstrip": false,
32
- "normalized": true,
33
- "rstrip": false,
34
- "single_word": true
35
- },
36
- "sp_model_kwargs": {},
37
- "tokenizer_class": "BaichuanTokenizer",
38
- "unk_token": {
39
- "__type": "AddedToken",
40
- "content": "<unk>",
41
- "lstrip": false,
42
- "normalized": true,
43
- "rstrip": false,
44
- "single_word": true
45
- }
46
  }
 
1
  {
2
+ "added_tokens_decoder": {},
3
+ "additional_special_tokens": [],
4
  "auto_map": {
5
  "AutoTokenizer": [
6
+ "Qwen/Qwen-14B-Chat--tokenization_qwen.QWenTokenizer",
7
  null
8
  ]
9
  },
10
+ "clean_up_tokenization_spaces": true,
11
+ "model_max_length": 8192,
12
+ "pad_token": "<|endoftext|>",
13
+ "tokenizer_class": "QWenTokenizer",
14
+ "tokenizer_file": null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44c5b42a73a5922e9f424220ed93104b7522c3933322acbae24277a9adc13c86
3
- size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f43f9cb85dcb6a0fd6bb9c7f9686d5a1982c15b07fe384da38edd02a572121a
3
+ size 4536