akshat-52
/

falcon-7b-sharded-bf16-finetuned-sexual-health-conversational

Text Generation

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Community

akshat-52 commited on Nov 18, 2023

Commit

af012e7

•

1 Parent(s): c9ecc8a

Update config.json

Files changed (1) hide show

config.json +32 -33

config.json CHANGED Viewed

@@ -1,34 +1,33 @@
 {
- "model_type": "unknown", // Specify the base model type if known (e.g., "bert", "gpt-2")
- "architectures": ["unknown"], // Specify the architecture if known (e.g., "BertForMaskedLM", "GPT2LMHeadModel")
- "finetuned_from": "vilsonrodrigues/falcon-7b-instruct-sharded",
- "tasks": "conversational", // Specify the task if it's a standard task in Hugging Face
- // Include training hyperparameters
- "learning_rate": 0.0002,
- "train_batch_size": 8,
- "eval_batch_size": 8,
- "seed": 42,
- "gradient_accumulation_steps": 8,
- "total_train_batch_size": 64,
- "optimizer": "Adam with betas=(0.9, 0.999) and epsilon=1e-08",
- "lr_scheduler_type": "cosine",
- "lr_scheduler_warmup_ratio": 0.03,
- "training_steps": 5,
- // Include library versions used for training
- "framework": "Pytorch",
- "pytorch_version": "2.1.0+cu118",
- "transformers_version": "4.34.1",
- "datasets_version": "2.14.6",
- "tokenizers_version": "0.14.1",
- // Other relevant model configuration parameters
- "vocab_size": 0, // Specify the vocabulary size
- "hidden_size": 0, // Specify the hidden layer size
- "num_hidden_layers": 0, // Specify the number of hidden layers
- "num_attention_heads": 0, // Specify the number of attention heads
- // ... other model-specific parameters
- "unknown_parameters": "This model has been fine-tuned on an unknown dataset with specific parameters and may not conform to the standard tasks or architectures. Please refer to the model documentation for more details."
-}

 {
+ "alibi": false,
+ "apply_residual_connection_post_layernorm": false,
+ "architectures": [
+ "FalconForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "auto_map": {
+ "AutoConfig": "configuration_falcon.FalconConfig",
+ "AutoModel": "modeling_falcon.FalconModel",
+ "AutoModelForSequenceClassification": "modeling_falcon.FalconForSequenceClassification",
+ "AutoModelForTokenClassification": "modeling_falcon.FalconForTokenClassification",
+ "AutoModelForQuestionAnswering": "modeling_falcon.FalconForQuestionAnswering",
+ "AutoModelForCausalLM": "modeling_falcon.FalconForCausalLM"
+ },
+ "bias": false,
+ "bos_token_id": 11,
+ "eos_token_id": 11,
+ "hidden_dropout": 0.0,
+ "hidden_size": 4544,
+ "initializer_range": 0.02,
+ "layer_norm_epsilon": 1e-05,
+ "model_type": "falcon",
+ "multi_query": true,
+ "new_decoder_architecture": false,
+ "num_attention_heads": 71,
+ "num_hidden_layers": 32,
+ "parallel_attn": true,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.27.4",
+ "use_cache": true,
+ "vocab_size": 65024
+}