qwen2-chat-14b-chinese

Files changed (7) hide show

README.md CHANGED Viewed

@@ -1,6 +1,5 @@
 ---
-license: other
-base_model: baichuan-inc/Baichuan2-13B-Chat
 tags:
 - generated_from_trainer
 model-index:
@@ -13,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
 # results
-This model is a fine-tuned version of [baichuan-inc/Baichuan2-13B-Chat](https://huggingface.co/baichuan-inc/Baichuan2-13B-Chat) on an unknown dataset.
 ## Model description
@@ -33,11 +32,11 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
-- train_batch_size: 4
 - eval_batch_size: 8
 - seed: 42
-- gradient_accumulation_steps: 4
-- total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
 - lr_scheduler_warmup_ratio: 0.03
@@ -49,7 +48,7 @@ The following hyperparameters were used during training:
 ### Framework versions
-- Transformers 4.33.2
-- Pytorch 2.0.1+cu118
 - Datasets 2.14.5
-- Tokenizers 0.13.3

 ---
+base_model: Qwen/Qwen-14B-Chat
 tags:
 - generated_from_trainer
 model-index:
 # results
+This model is a fine-tuned version of [Qwen/Qwen-14B-Chat](https://huggingface.co/Qwen/Qwen-14B-Chat) on an unknown dataset.
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
+- train_batch_size: 2
 - eval_batch_size: 8
 - seed: 42
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 4
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: constant
 - lr_scheduler_warmup_ratio: 0.03
 ### Framework versions
+- Transformers 4.34.0
+- Pytorch 2.1.0+cu121
 - Datasets 2.14.5
+- Tokenizers 0.14.1

adapter_config.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
   "auto_mapping": null,
-  "base_model_name_or_path": "baichuan-inc/Baichuan2-13B-Chat",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -12,10 +13,11 @@
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 1,
   "revision": null,
   "target_modules": [
-    "W_pack",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

 {
+  "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen-14B-Chat",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 1,
+  "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "c_attn",
+    "c_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3456e260d60c1c9a2186b8bd97a365a9935a87a1953bb67b92e5cddffafe1fb0
-size 4972557

 version https://git-lfs.github.com/spec/v1
+oid sha256:535173193eff181e5d75a310c4ecc0c41a86d716bf8107cc2b5bf5c2aae1e619
+size 8011402

qwen.tiktoken ADDED Viewed

The diff for this file is too large to render. See raw diff

special_tokens_map.json CHANGED Viewed

@@ -1,24 +1,3 @@
 {
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": true
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": true
-  },
-  "pad_token": "</s>",
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": true
-  }
 }

 {
+  "pad_token": "<|endoftext|>"
 }

tokenizer_config.json CHANGED Viewed

@@ -1,46 +1,15 @@
 {
-  "add_bos_token": false,
-  "add_eos_token": false,
   "auto_map": {
     "AutoTokenizer": [
-      "baichuan-inc/Baichuan2-13B-Chat--tokenization_baichuan.BaichuanTokenizer",
       null
     ]
   },
-  "bos_token": {
-    "__type": "AddedToken",
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": true
-  },
-  "clean_up_tokenization_spaces": false,
-  "eos_token": {
-    "__type": "AddedToken",
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": true
-  },
-  "model_max_length": 4096,
-  "pad_token": {
-    "__type": "AddedToken",
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": true
-  },
-  "sp_model_kwargs": {},
-  "tokenizer_class": "BaichuanTokenizer",
-  "unk_token": {
-    "__type": "AddedToken",
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": true,
-    "rstrip": false,
-    "single_word": true
-  }
 }

 {
+  "added_tokens_decoder": {},
+  "additional_special_tokens": [],
   "auto_map": {
     "AutoTokenizer": [
+      "Qwen/Qwen-14B-Chat--tokenization_qwen.QWenTokenizer",
       null
     ]
   },
+  "clean_up_tokenization_spaces": true,
+  "model_max_length": 8192,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "QWenTokenizer",
+  "tokenizer_file": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44c5b42a73a5922e9f424220ed93104b7522c3933322acbae24277a9adc13c86
-size 4027

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f43f9cb85dcb6a0fd6bb9c7f9686d5a1982c15b07fe384da38edd02a572121a
+size 4536