Xingchao Liu commited on
Commit
ca941de
1 Parent(s): 735aa29

upload model files

Browse files
config.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MultiModalityCausalLM"
4
+ ],
5
+ "language_config": {
6
+ "hidden_size": 2048,
7
+ "intermediate_size": 5632,
8
+ "max_position_embeddings": 16384,
9
+ "model_type": "llama",
10
+ "num_attention_heads": 16,
11
+ "num_hidden_layers": 24,
12
+ "num_key_value_heads": 16,
13
+ "torch_dtype": "bfloat16",
14
+ "vocab_size": 102400
15
+ },
16
+ "model_type": "multi_modality",
17
+ "torch_dtype": "bfloat16",
18
+ "transformers_version": "4.38.1",
19
+ "vision_gen_dec_config": {
20
+ "cls": "ShallowUViTDecoder",
21
+ "model_type": "vision_gen_dec",
22
+ "params": {
23
+ "block_out_channels": [
24
+ 768
25
+ ],
26
+ "elementwise_affine": true,
27
+ "hidden_size": 2048,
28
+ "in_channels": 768,
29
+ "layers_in_middle": 2,
30
+ "norm_eps": 1e-06,
31
+ "out_channels": 4,
32
+ "upsamples": 1,
33
+ "use_bias": true,
34
+ "use_mid_block": true
35
+ }
36
+ },
37
+ "vision_gen_enc_config": {
38
+ "cls": "ShallowUViTEncoder",
39
+ "model_type": "vision_gen_enc",
40
+ "params": {
41
+ "block_out_channels": [
42
+ 768
43
+ ],
44
+ "elementwize_affine": true,
45
+ "hidden_size": 2048,
46
+ "input_channels": 4,
47
+ "kernel_size": 2,
48
+ "layers_in_middle": 2,
49
+ "norm_eps": 1e-06,
50
+ "num_extra_tensors": 5,
51
+ "padding": 0,
52
+ "stride": 2,
53
+ "use_bias": true,
54
+ "use_mid_block": true
55
+ }
56
+ },
57
+ "vision_und_enc_config": {
58
+ "cls": "CLIPVisionTower",
59
+ "model_type": "vision_und_enc",
60
+ "params": {
61
+ "image_size": 384,
62
+ "model_name": "siglip_large_patch16_384",
63
+ "select_feature": "same",
64
+ "select_layer": -1,
65
+ "width": 1024
66
+ }
67
+ }
68
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e43167cf7a74edfaed1c35e7450c0e1f8345ff39f825e5372a0dbda8ef77eeb
3
+ size 4092812280
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "background_color": [
3
+ 127,
4
+ 127,
5
+ 127
6
+ ],
7
+ "do_normalize": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "VLMImageProcessor",
14
+ "image_size": 384,
15
+ "image_std": [
16
+ 0.5,
17
+ 0.5,
18
+ 0.5
19
+ ],
20
+ "interpolate_mode": "bilinear",
21
+ "max_size": 1536,
22
+ "min_size": 14,
23
+ "processor_class": "OmniProcessor",
24
+ "rescale_factor": 0.00392156862745098,
25
+ "resize_strategy": "aspect_ratio_pad"
26
+ }
processor_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_special_token": false,
3
+ "ignore_id": -100,
4
+ "image_end_tag": "<image_end>",
5
+ "image_tag": "<image_placeholder>",
6
+ "mask_prompt": true,
7
+ "num_image_tokens": 576,
8
+ "processor_class": "VLChatProcessor",
9
+ "sft_format": "deepseek",
10
+ "image_gen_tag": "<\uff5cbegin\u2581of\u2581generation\uff5c>",
11
+ "image_start_tag": "<image_beg>"
12
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|begin▁of▁generation|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ }
10
+ ],
11
+ "bos_token": {
12
+ "content": "<|begin▁of▁sentence|>",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false
17
+ },
18
+ "eos_token": {
19
+ "content": "<|end▁of▁sentence|>",
20
+ "lstrip": false,
21
+ "normalized": true,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ }
25
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "100000": {
6
+ "content": "<|begin▁of▁sentence|>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "100001": {
14
+ "content": "<|end▁of▁sentence|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "100002": {
22
+ "content": "<|▁pad▁|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "100003": {
30
+ "content": "<image_placeholder>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "100004": {
38
+ "content": "<patch_placeholder>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "100005": {
46
+ "content": "<image_beg>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "100006": {
54
+ "content": "<image_end>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "100007": {
62
+ "content": "<|begin▁of▁generation|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ }
69
+ },
70
+ "additional_special_tokens": [
71
+ "<|begin▁of▁generation|>"
72
+ ],
73
+ "bos_token": "<|begin▁of▁sentence|>",
74
+ "chat_template": "{%- set found_item = false -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set found_item = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not found_item -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{{'### Response:\\n'}}\n",
75
+ "clean_up_tokenization_spaces": false,
76
+ "eos_token": "<|end▁of▁sentence|>",
77
+ "legacy": true,
78
+ "model_max_length": 100000000,
79
+ "processor_class": "OmniProcessor",
80
+ "sp_model_kwargs": {},
81
+ "tokenizer_class": "LlamaTokenizer",
82
+ "unk_token": null,
83
+ "use_default_system_prompt": false
84
+ }