doberst commited on
Commit
34f1032
1 Parent(s): 4ec7307

Upload 6 files

Browse files
Files changed (6) hide show
  1. config.json +89 -0
  2. generation_config.json +7 -0
  3. merges.txt +0 -0
  4. tokenizer.json +0 -0
  5. tokenizer_config.json +40 -0
  6. vocab.json +0 -0
config.json ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "aib_version": "model_archive_072424_qwen15_extract_eot_4",
3
+ "training_dataset": [
4
+ "extract2_new_031724_eot_2_9442.jsonl"
5
+ ],
6
+ "training_timestamp": "Wed Jul 24 10:43:25 2024",
7
+ "training_comments": "qwen2-1.5b-extract-eot-072424-4",
8
+ "vocab_size": 151936,
9
+ "max_position_embeddings": 131072,
10
+ "hidden_size": 1536,
11
+ "intermediate_size": 8960,
12
+ "num_hidden_layers": 28,
13
+ "num_attention_heads": 12,
14
+ "use_sliding_window": false,
15
+ "sliding_window": 131072,
16
+ "max_window_layers": 28,
17
+ "num_key_value_heads": 2,
18
+ "hidden_act": "silu",
19
+ "initializer_range": 0.02,
20
+ "rms_norm_eps": 1e-06,
21
+ "use_cache": true,
22
+ "rope_theta": 1000000.0,
23
+ "attention_dropout": 0.0,
24
+ "return_dict": true,
25
+ "output_hidden_states": false,
26
+ "output_attentions": false,
27
+ "torchscript": false,
28
+ "torch_dtype": "bfloat16",
29
+ "use_bfloat16": false,
30
+ "tf_legacy_loss": false,
31
+ "pruned_heads": {},
32
+ "tie_word_embeddings": true,
33
+ "chunk_size_feed_forward": 0,
34
+ "is_encoder_decoder": false,
35
+ "is_decoder": false,
36
+ "cross_attention_hidden_size": null,
37
+ "add_cross_attention": false,
38
+ "tie_encoder_decoder": false,
39
+ "max_length": 20,
40
+ "min_length": 0,
41
+ "do_sample": false,
42
+ "early_stopping": false,
43
+ "num_beams": 1,
44
+ "num_beam_groups": 1,
45
+ "diversity_penalty": 0.0,
46
+ "temperature": 1.0,
47
+ "top_k": 50,
48
+ "top_p": 1.0,
49
+ "typical_p": 1.0,
50
+ "repetition_penalty": 1.0,
51
+ "length_penalty": 1.0,
52
+ "no_repeat_ngram_size": 0,
53
+ "encoder_no_repeat_ngram_size": 0,
54
+ "bad_words_ids": null,
55
+ "num_return_sequences": 1,
56
+ "output_scores": false,
57
+ "return_dict_in_generate": false,
58
+ "forced_bos_token_id": null,
59
+ "forced_eos_token_id": null,
60
+ "remove_invalid_values": false,
61
+ "exponential_decay_length_penalty": null,
62
+ "suppress_tokens": null,
63
+ "begin_suppress_tokens": null,
64
+ "architectures": [
65
+ "Qwen2ForCausalLM"
66
+ ],
67
+ "finetuning_task": null,
68
+ "id2label": {
69
+ "0": "LABEL_0",
70
+ "1": "LABEL_1"
71
+ },
72
+ "label2id": {
73
+ "LABEL_0": 0,
74
+ "LABEL_1": 1
75
+ },
76
+ "tokenizer_class": null,
77
+ "prefix": null,
78
+ "bos_token_id": 151643,
79
+ "pad_token_id": null,
80
+ "eos_token_id": 151643,
81
+ "sep_token_id": null,
82
+ "decoder_start_token_id": null,
83
+ "task_specific_params": null,
84
+ "problem_type": null,
85
+ "_name_or_path": "Qwen/Qwen2-1.5B",
86
+ "transformers_version": "4.38.1",
87
+ "model_type": "qwen2",
88
+ "trained": "custom training"
89
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": false,
4
+ "eos_token_id": 151643,
5
+ "max_new_tokens": 2048,
6
+ "transformers_version": "4.37.0"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ }
28
+ },
29
+ "additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
30
+ "bos_token": null,
31
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "<|endoftext|>",
34
+ "errors": "replace",
35
+ "model_max_length": 32768,
36
+ "pad_token": "<|endoftext|>",
37
+ "split_special_tokens": false,
38
+ "tokenizer_class": "Qwen2Tokenizer",
39
+ "unk_token": null
40
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff