File size: 2,594 Bytes
e170ec9
e4cd721
 
e170ec9
 
 
 
 
 
 
 
e4cd721
 
 
 
e170ec9
e4cd721
 
 
 
 
e170ec9
e4cd721
 
 
 
 
 
e170ec9
e4cd721
 
 
 
e170ec9
 
e4cd721
 
 
 
 
 
e170ec9
 
e4cd721
 
e170ec9
e4cd721
e170ec9
e4cd721
e170ec9
 
e4cd721
 
e170ec9
 
e4cd721
 
 
 
 
 
 
 
e170ec9
 
 
 
 
e4cd721
 
 
 
e170ec9
e4cd721
 
 
 
 
 
e170ec9
e4cd721
 
 
e170ec9
e4cd721
 
 
 
e170ec9
 
 
e4cd721
e170ec9
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
{
    "_attn_implementation_autoset": false,
    "add_cross_attention": false,
    "architectures": [
        "MolmoForCausalLM"
    ],
    "attention_layer_norm": false,
    "auto_map": {
        "AutoConfig": "config_molmo.MolmoConfig",
        "AutoModelForCausalLM": "modeling_molmo.MolmoForCausalLM"
    },
    "bad_words_ids": null,
    "begin_suppress_tokens": null,
    "bos_token_id": null,
    "chunk_size_feed_forward": 0,
    "clip_qkv": null,
    "cross_attention_hidden_size": null,
    "decoder_start_token_id": null,
    "diversity_penalty": 0.0,
    "do_sample": false,
    "early_stopping": false,
    "embedding_size": 152064,
    "encoder_no_repeat_ngram_size": 0,
    "eos_token_id": null,
    "exponential_decay_length_penalty": null,
    "finetuning_task": null,
    "forced_bos_token_id": null,
    "forced_eos_token_id": null,
    "hidden_size": 3584,
    "id2label": {
        "0": "LABEL_0",
        "1": "LABEL_1"
    },
    "initializer_range": 0.02,
    "intermediate_size": 37888,
    "is_decoder": false,
    "is_encoder_decoder": false,
    "label2id": {
        "LABEL_0": 0,
        "LABEL_1": 1
    },
    "layer_norm_eps": 1e-06,
    "layer_norm_type": "rms",
    "length_penalty": 1.0,
    "max_length": 20,
    "max_position_embeddings": 4096,
    "min_length": 0,
    "model_type": "molmo",
    "no_repeat_ngram_size": 0,
    "norm_after": false,
    "num_attention_heads": 28,
    "num_beam_groups": 1,
    "num_beams": 1,
    "num_hidden_layers": 28,
    "num_key_value_heads": 4,
    "num_return_sequences": 1,
    "output_attentions": false,
    "output_hidden_states": false,
    "output_scores": false,
    "pad_token_id": null,
    "prefix": null,
    "problem_type": null,
    "pruned_heads": {},
    "qkv_bias": true,
    "quantization": {
        "group_size": 64,
        "bits": 6
    },
    "remove_invalid_values": false,
    "repetition_penalty": 1.0,
    "return_dict": true,
    "return_dict_in_generate": false,
    "rope_theta": 1000000.0,
    "sep_token_id": null,
    "suppress_tokens": null,
    "task_specific_params": null,
    "temperature": 1.0,
    "tf_legacy_loss": false,
    "tie_encoder_decoder": false,
    "tie_word_embeddings": false,
    "tokenizer_class": null,
    "top_k": 50,
    "top_p": 1.0,
    "torch_dtype": "float32",
    "torchscript": false,
    "transformers_version": "4.47.1",
    "typical_p": 1.0,
    "use_bfloat16": false,
    "use_cache": true,
    "use_position_ids": true,
    "vision_config": {
        "skip_vision_non_divisible": true
    },
    "vocab_size": 152064,
    "weight_tying": false
}