thangvip commited on Mar 22

Commit

9be2824

•

1 Parent(s): 1358524

push model

Browse files

Files changed (42) hide show

README.md +52 -0
added_tokens.json +5 -0
all_results.json +9 -0
checkpoint-704/added_tokens.json +5 -0
checkpoint-704/config.json +28 -0
checkpoint-704/generation_config.json +6 -0
checkpoint-704/merges.txt +0 -0
checkpoint-704/model.safetensors +3 -0
checkpoint-704/optimizer.pt +3 -0
checkpoint-704/rng_state.pth +3 -0
checkpoint-704/scheduler.pt +3 -0
checkpoint-704/special_tokens_map.json +20 -0
checkpoint-704/tokenizer.json +0 -0
checkpoint-704/tokenizer_config.json +43 -0
checkpoint-704/trainer_state.json +28 -0
checkpoint-704/training_args.bin +3 -0
checkpoint-704/vocab.json +0 -0
checkpoint-720/added_tokens.json +5 -0
checkpoint-720/config.json +28 -0
checkpoint-720/generation_config.json +6 -0
checkpoint-720/merges.txt +0 -0
checkpoint-720/model.safetensors +3 -0
checkpoint-720/optimizer.pt +3 -0
checkpoint-720/rng_state.pth +3 -0
checkpoint-720/scheduler.pt +3 -0
checkpoint-720/special_tokens_map.json +20 -0
checkpoint-720/tokenizer.json +0 -0
checkpoint-720/tokenizer_config.json +43 -0
checkpoint-720/trainer_state.json +28 -0
checkpoint-720/training_args.bin +3 -0
checkpoint-720/vocab.json +0 -0
config.json +28 -0
generation_config.json +6 -0
merges.txt +0 -0
model.safetensors +3 -0
special_tokens_map.json +20 -0
tokenizer.json +0 -0
tokenizer_config.json +43 -0
train_results.json +9 -0
trainer_state.json +45 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,52 @@

+---
+license: apache-2.0
+base_model: sail/Sailor-0.5B
+tags:
+- trl
+- sft
+- generated_from_trainer
+model-index:
+- name: pirate-0.5
+ results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# pirate-0.5
+This model is a fine-tuned version of [sail/Sailor-0.5B](https://huggingface.co/sail/Sailor-0.5B) on an unknown dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- gradient_accumulation_steps: 16
+- total_train_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 1
+### Framework versions
+- Transformers 4.39.0
+- Pytorch 2.2.1+cu121
+- Datasets 2.18.0
+- Tokenizers 0.15.2

added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+ "<|endoftext|>": 151643,
+ "<|im_end|>": 151645,
+ "<|im_start|>": 151644
+}

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+ "epoch": 1.0,
+ "total_flos": 6.586350857040691e+16,
+ "train_loss": 2.345679919805553,
+ "train_runtime": 10097.1443,
+ "train_samples": 11564,
+ "train_samples_per_second": 1.145,
+ "train_steps_per_second": 0.072
+}

checkpoint-704/added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+ "<|endoftext|>": 151643,
+ "<|im_end|>": 151645,
+ "<|im_start|>": 151644
+}

checkpoint-704/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+ "_name_or_path": "sail/Sailor-0.5B",
+ "architectures": [
+ "Qwen2ForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 151643,
+ "eos_token_id": 151643,
+ "hidden_act": "silu",
+ "hidden_size": 1024,
+ "initializer_range": 0.02,
+ "intermediate_size": 2816,
+ "max_position_embeddings": 32768,
+ "max_window_layers": 21,
+ "model_type": "qwen2",
+ "num_attention_heads": 16,
+ "num_hidden_layers": 24,
+ "num_key_value_heads": 16,
+ "rms_norm_eps": 1e-06,
+ "rope_theta": 1000000.0,
+ "sliding_window": 32768,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.39.0",
+ "use_cache": false,
+ "use_sliding_window": false,
+ "vocab_size": 151936
+}

checkpoint-704/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "bos_token_id": 151643,
+ "eos_token_id": 151643,
+ "max_new_tokens": 2048,
+ "transformers_version": "4.39.0"
+}

checkpoint-704/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-704/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ac8505106ce5bce1ae65a13be27e7c99cc3113e26b6da42fd7316dc80546fb8
+size 2478313760

checkpoint-704/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:57a973a5b8394aeda9a3416f629a91b730807b4450b13db60cbd8b263418c24f
+size 4956808758

checkpoint-704/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2836f8619966597a96aa93766bae75f701ea0f05f6c5ec4da2400e566eb2157e
+size 14180

checkpoint-704/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:25593381fb2d18a57982e3685b21e92e41bb3756b66d314972de8531fe86d4d9
+size 1064

checkpoint-704/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>"
+ ],
+ "eos_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}

checkpoint-704/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-704/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+ "add_prefix_space": false,
+ "added_tokens_decoder": {
+ "151643": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151644": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151645": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>"
+ ],
+ "bos_token": null,
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|endoftext|>",
+ "errors": "replace",
+ "model_max_length": 2048,
+ "pad_token": "<|endoftext|>",
+ "split_special_tokens": false,
+ "tokenizer_class": "Qwen2Tokenizer",
+ "unk_token": null
+}

checkpoint-704/trainer_state.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.9740574195780007,
+ "eval_steps": 500,
+ "global_step": 704,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.69,
+ "grad_norm": 1.6492760181427002,
+ "learning_rate": 3.074792243767313e-05,
+ "loss": 2.393,
+ "step": 500
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 722,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 1,
+ "save_steps": 16,
+ "total_flos": 6.422148204095078e+16,
+ "train_batch_size": 1,
+ "trial_name": null,
+ "trial_params": null
+}

checkpoint-704/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7d4ae8d399268e950a2907d698c780c677d3e9ca2ca197ca7574643ca3afddb
+size 4920

checkpoint-704/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-720/added_tokens.json ADDED Viewed

	@@ -0,0 +1,5 @@

+{
+ "<|endoftext|>": 151643,
+ "<|im_end|>": 151645,
+ "<|im_start|>": 151644
+}

checkpoint-720/config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+ "_name_or_path": "sail/Sailor-0.5B",
+ "architectures": [
+ "Qwen2ForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 151643,
+ "eos_token_id": 151643,
+ "hidden_act": "silu",
+ "hidden_size": 1024,
+ "initializer_range": 0.02,
+ "intermediate_size": 2816,
+ "max_position_embeddings": 32768,
+ "max_window_layers": 21,
+ "model_type": "qwen2",
+ "num_attention_heads": 16,
+ "num_hidden_layers": 24,
+ "num_key_value_heads": 16,
+ "rms_norm_eps": 1e-06,
+ "rope_theta": 1000000.0,
+ "sliding_window": 32768,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.39.0",
+ "use_cache": false,
+ "use_sliding_window": false,
+ "vocab_size": 151936
+}

checkpoint-720/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "bos_token_id": 151643,
+ "eos_token_id": 151643,
+ "max_new_tokens": 2048,
+ "transformers_version": "4.39.0"
+}

checkpoint-720/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-720/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6212ecbd55c15204cf42b1cd0ac1b84ad584fbe6d4244e51ed4d98c23436cdaa
+size 2478313760

checkpoint-720/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e0d2a6a7ddf48e96eaab0d3002b43c61a5bf9d954988cff21babbaa9e093334
+size 4956808758

checkpoint-720/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2836f8619966597a96aa93766bae75f701ea0f05f6c5ec4da2400e566eb2157e
+size 14180

checkpoint-720/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e143127b26e54908981dd0613514cd8815de95036afbec20c87b2202ee37a429
+size 1064

checkpoint-720/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>"
+ ],
+ "eos_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}

checkpoint-720/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-720/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+ "add_prefix_space": false,
+ "added_tokens_decoder": {
+ "151643": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151644": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151645": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>"
+ ],
+ "bos_token": null,
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|endoftext|>",
+ "errors": "replace",
+ "model_max_length": 2048,
+ "pad_token": "<|endoftext|>",
+ "split_special_tokens": false,
+ "tokenizer_class": "Qwen2Tokenizer",
+ "unk_token": null
+}

checkpoint-720/trainer_state.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.9961950882047734,
+ "eval_steps": 500,
+ "global_step": 720,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.69,
+ "grad_norm": 1.6492760181427002,
+ "learning_rate": 3.074792243767313e-05,
+ "loss": 2.393,
+ "step": 500
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 722,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 1,
+ "save_steps": 16,
+ "total_flos": 6.568106117824512e+16,
+ "train_batch_size": 1,
+ "trial_name": null,
+ "trial_params": null
+}

checkpoint-720/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7d4ae8d399268e950a2907d698c780c677d3e9ca2ca197ca7574643ca3afddb
+size 4920

checkpoint-720/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+ "_name_or_path": "sail/Sailor-0.5B",
+ "architectures": [
+ "Qwen2ForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 151643,
+ "eos_token_id": 151643,
+ "hidden_act": "silu",
+ "hidden_size": 1024,
+ "initializer_range": 0.02,
+ "intermediate_size": 2816,
+ "max_position_embeddings": 32768,
+ "max_window_layers": 21,
+ "model_type": "qwen2",
+ "num_attention_heads": 16,
+ "num_hidden_layers": 24,
+ "num_key_value_heads": 16,
+ "rms_norm_eps": 1e-06,
+ "rope_theta": 1000000.0,
+ "sliding_window": 32768,
+ "tie_word_embeddings": false,
+ "torch_dtype": "float32",
+ "transformers_version": "4.39.0",
+ "use_cache": false,
+ "use_sliding_window": false,
+ "vocab_size": 151936
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+ "bos_token_id": 151643,
+ "eos_token_id": 151643,
+ "max_new_tokens": 2048,
+ "transformers_version": "4.39.0"
+}

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:463fd004ca4166b871e4ecac22a31dada7611b529e6000fd11ba7ccd7cf5c92b
+size 2478313760

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,20 @@

+{
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>"
+ ],
+ "eos_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+ "add_prefix_space": false,
+ "added_tokens_decoder": {
+ "151643": {
+ "content": "<|endoftext|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151644": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "151645": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "<|im_start|>",
+ "<|im_end|>"
+ ],
+ "bos_token": null,
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|endoftext|>",
+ "errors": "replace",
+ "model_max_length": 2048,
+ "pad_token": "<|endoftext|>",
+ "split_special_tokens": false,
+ "tokenizer_class": "Qwen2Tokenizer",
+ "unk_token": null
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+ "epoch": 1.0,
+ "total_flos": 6.586350857040691e+16,
+ "train_loss": 2.345679919805553,
+ "train_runtime": 10097.1443,
+ "train_samples": 11564,
+ "train_samples_per_second": 1.145,
+ "train_steps_per_second": 0.072
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 0.99896229678312,
+ "eval_steps": 500,
+ "global_step": 722,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.69,
+ "grad_norm": 1.6492760181427002,
+ "learning_rate": 3.074792243767313e-05,
+ "loss": 2.393,
+ "step": 500
+ },
+ {
+ "epoch": 1.0,
+ "eval_loss": 2.211937189102173,
+ "eval_runtime": 268.8732,
+ "eval_samples_per_second": 4.779,
+ "eval_steps_per_second": 4.779,
+ "step": 722
+ },
+ {
+ "epoch": 1.0,
+ "step": 722,
+ "total_flos": 6.586350857040691e+16,
+ "train_loss": 2.345679919805553,
+ "train_runtime": 10097.1443,
+ "train_samples_per_second": 1.145,
+ "train_steps_per_second": 0.072
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 722,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 1,
+ "save_steps": 16,
+ "total_flos": 6.586350857040691e+16,
+ "train_batch_size": 1,
+ "trial_name": null,
+ "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e53c5206754d6bc986a8887120b97ccfab44b73a311041049e80ef1642ed8e8d
+size 4920

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff