akshat-52 commited on
Commit
af012e7
1 Parent(s): c9ecc8a

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +32 -33
config.json CHANGED
@@ -1,34 +1,33 @@
1
  {
2
- "model_type": "unknown", // Specify the base model type if known (e.g., "bert", "gpt-2")
3
- "architectures": ["unknown"], // Specify the architecture if known (e.g., "BertForMaskedLM", "GPT2LMHeadModel")
4
- "finetuned_from": "vilsonrodrigues/falcon-7b-instruct-sharded",
5
- "tasks": "conversational", // Specify the task if it's a standard task in Hugging Face
6
-
7
- // Include training hyperparameters
8
- "learning_rate": 0.0002,
9
- "train_batch_size": 8,
10
- "eval_batch_size": 8,
11
- "seed": 42,
12
- "gradient_accumulation_steps": 8,
13
- "total_train_batch_size": 64,
14
- "optimizer": "Adam with betas=(0.9, 0.999) and epsilon=1e-08",
15
- "lr_scheduler_type": "cosine",
16
- "lr_scheduler_warmup_ratio": 0.03,
17
- "training_steps": 5,
18
-
19
- // Include library versions used for training
20
- "framework": "Pytorch",
21
- "pytorch_version": "2.1.0+cu118",
22
- "transformers_version": "4.34.1",
23
- "datasets_version": "2.14.6",
24
- "tokenizers_version": "0.14.1",
25
-
26
- // Other relevant model configuration parameters
27
- "vocab_size": 0, // Specify the vocabulary size
28
- "hidden_size": 0, // Specify the hidden layer size
29
- "num_hidden_layers": 0, // Specify the number of hidden layers
30
- "num_attention_heads": 0, // Specify the number of attention heads
31
- // ... other model-specific parameters
32
-
33
- "unknown_parameters": "This model has been fine-tuned on an unknown dataset with specific parameters and may not conform to the standard tasks or architectures. Please refer to the model documentation for more details."
34
- }
 
1
  {
2
+ "alibi": false,
3
+ "apply_residual_connection_post_layernorm": false,
4
+ "architectures": [
5
+ "FalconForCausalLM"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "auto_map": {
9
+ "AutoConfig": "configuration_falcon.FalconConfig",
10
+ "AutoModel": "modeling_falcon.FalconModel",
11
+ "AutoModelForSequenceClassification": "modeling_falcon.FalconForSequenceClassification",
12
+ "AutoModelForTokenClassification": "modeling_falcon.FalconForTokenClassification",
13
+ "AutoModelForQuestionAnswering": "modeling_falcon.FalconForQuestionAnswering",
14
+ "AutoModelForCausalLM": "modeling_falcon.FalconForCausalLM"
15
+ },
16
+ "bias": false,
17
+ "bos_token_id": 11,
18
+ "eos_token_id": 11,
19
+ "hidden_dropout": 0.0,
20
+ "hidden_size": 4544,
21
+ "initializer_range": 0.02,
22
+ "layer_norm_epsilon": 1e-05,
23
+ "model_type": "falcon",
24
+ "multi_query": true,
25
+ "new_decoder_architecture": false,
26
+ "num_attention_heads": 71,
27
+ "num_hidden_layers": 32,
28
+ "parallel_attn": true,
29
+ "torch_dtype": "bfloat16",
30
+ "transformers_version": "4.27.4",
31
+ "use_cache": true,
32
+ "vocab_size": 65024
33
+ }