ClarkWangPas commited on Nov 15, 2023

Commit

ef571c5

1 Parent(s): e660fa1

NER_Second_Try

Browse files

Files changed (24) hide show

README.md +53 -0
adapter_config.json +23 -0
adapter_model.safetensors +3 -0
api_experiment_run/description.json +352 -0
api_experiment_run/model/logs/training/events.out.tfevents.1700048114.jupyter-carlosruizmoreno.1721.0 +3 -0
api_experiment_run/model/model_hyperparameters.json +0 -0
api_experiment_run/model/training_set_metadata.json +0 -0
api_experiment_run_0/description.json +352 -0
api_experiment_run_0/model/training_set_metadata.json +0 -0
final_checkpoint/README.md +257 -0
final_checkpoint/adapter_config.json +23 -0
final_checkpoint/adapter_model.bin +3 -0
runs/Nov14_21-53-35_jupyter-carlosruizmoreno/events.out.tfevents.1699998818.jupyter-carlosruizmoreno.526.0 +3 -0
runs/Nov15_09-42-53_jupyter-carlosruizmoreno/events.out.tfevents.1700041375.jupyter-carlosruizmoreno.623.0 +3 -0
runs/Nov15_11-40-06_jupyter-carlosruizmoreno/events.out.tfevents.1700048408.jupyter-carlosruizmoreno.623.1 +3 -0
runs/Nov15_11-41-48_jupyter-carlosruizmoreno/events.out.tfevents.1700048511.jupyter-carlosruizmoreno.2192.0 +3 -0
runs/Nov15_11-47-35_jupyter-carlosruizmoreno/events.out.tfevents.1700048867.jupyter-carlosruizmoreno.2530.0 +3 -0
runs/Nov15_11-49-46_jupyter-carlosruizmoreno/events.out.tfevents.1700048989.jupyter-carlosruizmoreno.2530.1 +3 -0
runs/Nov15_11-50-51_jupyter-carlosruizmoreno/events.out.tfevents.1700049053.jupyter-carlosruizmoreno.2971.0 +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +39 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,53 @@

+---
+base_model: meta-llama/Llama-2-7b-hf
+tags:
+- generated_from_trainer
+model-index:
+- name: results
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# results
+This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on an unknown dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 4
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 16
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- training_steps: 200
+### Training results
+### Framework versions
+- Transformers 4.35.1
+- Pytorch 2.1.0+cu121
+- Datasets 2.14.7
+- Tokenizers 0.14.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e6fdb318d0515c6f87e91e0936391b1fc4cc2bf4882cee7a4b1f9a8183fc9309
+size 67126104

api_experiment_run/description.json ADDED Viewed

	@@ -0,0 +1,352 @@

+{
+    "command": "/opt/conda/lib/python3.10/site-packages/ipykernel_launcher.py -f /home/jovyan/.local/share/jupyter/runtime/kernel-67de967a-aa00-4e36-bdc4-17a4dd6f9d51.json",
+    "compute": {
+        "arch_list": [
+            "sm_50",
+            "sm_60",
+            "sm_70",
+            "sm_75",
+            "sm_80",
+            "sm_86",
+            "sm_90"
+        ],
+        "devices": {
+            "0": {
+                "device_capability": [
+                    8,
+                    0
+                ],
+                "device_properties": "_CudaDeviceProperties(name='NVIDIA A100-PCIE-40GB MIG 7g.40gb', major=8, minor=0, total_memory=40339MB, multi_processor_count=98)",
+                "gpu_type": "NVIDIA A100-PCIE-40GB MIG 7g.40gb"
+            }
+        },
+        "gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90",
+        "gpus_per_node": 1,
+        "num_nodes": 1
+    },
+    "config": {
+        "adapter": {
+            "alpha": 16,
+            "bias_type": "none",
+            "dropout": 0.05,
+            "postprocessor": {
+                "merge_adapter_into_base_model": false,
+                "progressbar": false
+            },
+            "pretrained_adapter_weights": null,
+            "r": 8,
+            "type": "lora"
+        },
+        "backend": null,
+        "base_model": "meta-llama/Llama-2-7b-hf",
+        "defaults": {
+            "text": {
+                "decoder": {
+                    "fc_activation": "relu",
+                    "fc_bias_initializer": "zeros",
+                    "fc_dropout": 0.0,
+                    "fc_layers": null,
+                    "fc_norm": null,
+                    "fc_norm_params": null,
+                    "fc_output_size": 256,
+                    "fc_use_bias": true,
+                    "fc_weights_initializer": "xavier_uniform",
+                    "input_size": null,
+                    "max_new_tokens": null,
+                    "num_fc_layers": 0,
+                    "pretrained_model_name_or_path": "",
+                    "tokenizer": "hf_tokenizer",
+                    "type": "text_extractor",
+                    "vocab_file": ""
+                },
+                "encoder": {
+                    "skip": false,
+                    "type": "passthrough"
+                },
+                "loss": {
+                    "class_similarities": null,
+                    "class_similarities_temperature": 0,
+                    "class_weights": null,
+                    "confidence_penalty": 0,
+                    "robust_lambda": 0,
+                    "type": "next_token_softmax_cross_entropy",
+                    "unique": false,
+                    "weight": 1.0
+                },
+                "preprocessing": {
+                    "cache_encoder_embeddings": false,
+                    "compute_idf": false,
+                    "computed_fill_value": "<UNK>",
+                    "fill_value": "<UNK>",
+                    "lowercase": true,
+                    "max_sequence_length": 256,
+                    "missing_value_strategy": "fill_with_const",
+                    "most_common": 20000,
+                    "ngram_size": 2,
+                    "padding": "right",
+                    "padding_symbol": "<PAD>",
+                    "pretrained_model_name_or_path": null,
+                    "prompt": {
+                        "retrieval": {
+                            "index_name": null,
+                            "k": 0,
+                            "model_name": null,
+                            "type": null
+                        },
+                        "task": null,
+                        "template": null
+                    },
+                    "sequence_length": null,
+                    "tokenizer": "space_punct",
+                    "unknown_symbol": "<UNK>",
+                    "vocab_file": null
+                }
+            }
+        },
+        "generation": {
+            "bad_words_ids": null,
+            "begin_suppress_tokens": null,
+            "bos_token_id": null,
+            "diversity_penalty": 0.0,
+            "do_sample": true,
+            "early_stopping": false,
+            "encoder_repetition_penalty": 1.0,
+            "eos_token_id": null,
+            "epsilon_cutoff": 0.0,
+            "eta_cutoff": 0.0,
+            "exponential_decay_length_penalty": null,
+            "force_words_ids": null,
+            "forced_bos_token_id": null,
+            "forced_decoder_ids": null,
+            "forced_eos_token_id": null,
+            "guidance_scale": null,
+            "length_penalty": 1.0,
+            "max_length": 32,
+            "max_new_tokens": 512,
+            "max_time": null,
+            "min_length": 0,
+            "min_new_tokens": null,
+            "no_repeat_ngram_size": 0,
+            "num_beam_groups": 1,
+            "num_beams": 1,
+            "pad_token_id": null,
+            "penalty_alpha": null,
+            "remove_invalid_values": false,
+            "renormalize_logits": false,
+            "repetition_penalty": 1.0,
+            "sequence_bias": null,
+            "suppress_tokens": null,
+            "temperature": 0.0,
+            "top_k": 50,
+            "top_p": 1.0,
+            "typical_p": 1.0,
+            "use_cache": true
+        },
+        "hyperopt": null,
+        "input_features": [
+            {
+                "active": true,
+                "column": "instruction",
+                "encoder": {
+                    "skip": false,
+                    "type": "passthrough"
+                },
+                "name": "instruction",
+                "preprocessing": {
+                    "cache_encoder_embeddings": false,
+                    "compute_idf": false,
+                    "computed_fill_value": "<UNK>",
+                    "fill_value": "<UNK>",
+                    "lowercase": true,
+                    "max_sequence_length": null,
+                    "missing_value_strategy": "fill_with_const",
+                    "most_common": 20000,
+                    "ngram_size": 2,
+                    "padding": "left",
+                    "padding_symbol": "<PAD>",
+                    "pretrained_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+                    "sequence_length": null,
+                    "tokenizer": "hf_tokenizer",
+                    "unknown_symbol": "<UNK>",
+                    "vocab_file": null
+                },
+                "proc_column": "instruction_TityHg",
+                "tied": null,
+                "type": "text"
+            }
+        ],
+        "ludwig_version": "0.8.6",
+        "model_parameters": null,
+        "model_type": "llm",
+        "output_features": [
+            {
+                "active": true,
+                "class_similarities": null,
+                "column": "output",
+                "decoder": {
+                    "fc_activation": "relu",
+                    "fc_bias_initializer": "zeros",
+                    "fc_dropout": 0.0,
+                    "fc_layers": null,
+                    "fc_norm": null,
+                    "fc_norm_params": null,
+                    "fc_output_size": 256,
+                    "fc_use_bias": true,
+                    "fc_weights_initializer": "xavier_uniform",
+                    "input_size": null,
+                    "max_new_tokens": 512,
+                    "num_fc_layers": 0,
+                    "pretrained_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+                    "tokenizer": "hf_tokenizer",
+                    "type": "text_extractor",
+                    "vocab_file": ""
+                },
+                "default_validation_metric": "loss",
+                "dependencies": [],
+                "input_size": null,
+                "loss": {
+                    "class_similarities": null,
+                    "class_similarities_temperature": 0,
+                    "class_weights": null,
+                    "confidence_penalty": 0,
+                    "robust_lambda": 0,
+                    "type": "next_token_softmax_cross_entropy",
+                    "unique": false,
+                    "weight": 1.0
+                },
+                "name": "output",
+                "num_classes": null,
+                "preprocessing": {
+                    "cache_encoder_embeddings": false,
+                    "compute_idf": false,
+                    "computed_fill_value": "<UNK>",
+                    "fill_value": "<UNK>",
+                    "lowercase": true,
+                    "max_sequence_length": null,
+                    "missing_value_strategy": "drop_row",
+                    "most_common": 20000,
+                    "ngram_size": 2,
+                    "padding": "left",
+                    "padding_symbol": "<PAD>",
+                    "pretrained_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+                    "sequence_length": null,
+                    "tokenizer": "hf_tokenizer",
+                    "unknown_symbol": "<UNK>",
+                    "vocab_file": null
+                },
+                "proc_column": "output_9bi87u",
+                "reduce_dependencies": "sum",
+                "reduce_input": "sum",
+                "type": "text"
+            }
+        ],
+        "preprocessing": {
+            "global_max_sequence_length": 512,
+            "oversample_minority": null,
+            "sample_ratio": 1.0,
+            "sample_size": null,
+            "split": {
+                "probabilities": [
+                    1.0,
+                    0.0,
+                    0.0
+                ],
+                "type": "random"
+            },
+            "undersample_majority": null
+        },
+        "prompt": {
+            "retrieval": {
+                "index_name": null,
+                "k": 0,
+                "model_name": null,
+                "type": null
+            },
+            "task": null,
+            "template": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n### Instruction:\n### Input: {input}\n### Response:"
+        },
+        "quantization": {
+            "bits": 4,
+            "bnb_4bit_compute_dtype": "float16",
+            "bnb_4bit_quant_type": "nf4",
+            "bnb_4bit_use_double_quant": true,
+            "llm_int8_has_fp16_weight": false,
+            "llm_int8_threshold": 6.0
+        },
+        "trainer": {
+            "base_learning_rate": 0.0,
+            "batch_size": 1,
+            "bucketing_field": null,
+            "checkpoints_per_epoch": 0,
+            "compile": false,
+            "early_stop": 5,
+            "effective_batch_size": "auto",
+            "enable_gradient_checkpointing": false,
+            "enable_profiling": false,
+            "epochs": 6,
+            "eval_batch_size": 2,
+            "evaluate_training_set": false,
+            "gradient_accumulation_steps": 16,
+            "gradient_clipping": {
+                "clipglobalnorm": 0.5,
+                "clipnorm": null,
+                "clipvalue": null
+            },
+            "increase_batch_size_eval_metric": "loss",
+            "increase_batch_size_eval_split": "training",
+            "increase_batch_size_on_plateau": 0,
+            "increase_batch_size_on_plateau_patience": 5,
+            "increase_batch_size_on_plateau_rate": 2.0,
+            "learning_rate": 0.0005,
+            "learning_rate_scaling": "linear",
+            "learning_rate_scheduler": {
+                "decay": null,
+                "decay_rate": 0.96,
+                "decay_steps": 10000,
+                "eta_min": 0,
+                "reduce_eval_metric": "loss",
+                "reduce_eval_split": "training",
+                "reduce_on_plateau": 0,
+                "reduce_on_plateau_patience": 10,
+                "reduce_on_plateau_rate": 0.1,
+                "staircase": false,
+                "t_0": null,
+                "t_mult": 1,
+                "warmup_evaluations": 0,
+                "warmup_fraction": 0.03
+            },
+            "max_batch_size": 1099511627776,
+            "optimizer": {
+                "amsgrad": false,
+                "betas": [
+                    0.9,
+                    0.999
+                ],
+                "eps": 1e-08,
+                "type": "adam",
+                "weight_decay": 0.0
+            },
+            "profiler": {
+                "active": 3,
+                "repeat": 5,
+                "skip_first": 0,
+                "wait": 1,
+                "warmup": 1
+            },
+            "regularization_lambda": 0.0,
+            "regularization_type": "l2",
+            "should_shuffle": true,
+            "skip_all_evaluation": false,
+            "steps_per_checkpoint": 0,
+            "train_steps": null,
+            "type": "finetune",
+            "use_mixed_precision": false,
+            "validation_field": "output",
+            "validation_metric": "loss"
+        }
+    },
+    "data_format": "<class 'pandas.core.frame.DataFrame'>",
+    "ludwig_version": "0.8.6",
+    "random_seed": 42,
+    "torch_version": "2.1.0+cu121"
+}

api_experiment_run/model/logs/training/events.out.tfevents.1700048114.jupyter-carlosruizmoreno.1721.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5b19c2dc00151ccb14c7dcadd4d768b6df235f7abc8dd34ecee359c0b29b3a1
+size 1227

api_experiment_run/model/model_hyperparameters.json ADDED Viewed

The diff for this file is too large to render. See raw diff

api_experiment_run/model/training_set_metadata.json ADDED Viewed

The diff for this file is too large to render. See raw diff

api_experiment_run_0/description.json ADDED Viewed

	@@ -0,0 +1,352 @@

+{
+    "command": "/opt/conda/lib/python3.10/site-packages/ipykernel_launcher.py -f /home/jovyan/.local/share/jupyter/runtime/kernel-67de967a-aa00-4e36-bdc4-17a4dd6f9d51.json",
+    "compute": {
+        "arch_list": [
+            "sm_50",
+            "sm_60",
+            "sm_70",
+            "sm_75",
+            "sm_80",
+            "sm_86",
+            "sm_90"
+        ],
+        "devices": {
+            "0": {
+                "device_capability": [
+                    8,
+                    0
+                ],
+                "device_properties": "_CudaDeviceProperties(name='NVIDIA A100-PCIE-40GB MIG 7g.40gb', major=8, minor=0, total_memory=40339MB, multi_processor_count=98)",
+                "gpu_type": "NVIDIA A100-PCIE-40GB MIG 7g.40gb"
+            }
+        },
+        "gencode_flags": "-gencode compute=compute_50,code=sm_50 -gencode compute=compute_60,code=sm_60 -gencode compute=compute_70,code=sm_70 -gencode compute=compute_75,code=sm_75 -gencode compute=compute_80,code=sm_80 -gencode compute=compute_86,code=sm_86 -gencode compute=compute_90,code=sm_90",
+        "gpus_per_node": 1,
+        "num_nodes": 1
+    },
+    "config": {
+        "adapter": {
+            "alpha": 16,
+            "bias_type": "none",
+            "dropout": 0.05,
+            "postprocessor": {
+                "merge_adapter_into_base_model": false,
+                "progressbar": false
+            },
+            "pretrained_adapter_weights": null,
+            "r": 8,
+            "type": "lora"
+        },
+        "backend": null,
+        "base_model": "meta-llama/Llama-2-7b-hf",
+        "defaults": {
+            "text": {
+                "decoder": {
+                    "fc_activation": "relu",
+                    "fc_bias_initializer": "zeros",
+                    "fc_dropout": 0.0,
+                    "fc_layers": null,
+                    "fc_norm": null,
+                    "fc_norm_params": null,
+                    "fc_output_size": 256,
+                    "fc_use_bias": true,
+                    "fc_weights_initializer": "xavier_uniform",
+                    "input_size": null,
+                    "max_new_tokens": null,
+                    "num_fc_layers": 0,
+                    "pretrained_model_name_or_path": "",
+                    "tokenizer": "hf_tokenizer",
+                    "type": "text_extractor",
+                    "vocab_file": ""
+                },
+                "encoder": {
+                    "skip": false,
+                    "type": "passthrough"
+                },
+                "loss": {
+                    "class_similarities": null,
+                    "class_similarities_temperature": 0,
+                    "class_weights": null,
+                    "confidence_penalty": 0,
+                    "robust_lambda": 0,
+                    "type": "next_token_softmax_cross_entropy",
+                    "unique": false,
+                    "weight": 1.0
+                },
+                "preprocessing": {
+                    "cache_encoder_embeddings": false,
+                    "compute_idf": false,
+                    "computed_fill_value": "<UNK>",
+                    "fill_value": "<UNK>",
+                    "lowercase": true,
+                    "max_sequence_length": 256,
+                    "missing_value_strategy": "fill_with_const",
+                    "most_common": 20000,
+                    "ngram_size": 2,
+                    "padding": "right",
+                    "padding_symbol": "<PAD>",
+                    "pretrained_model_name_or_path": null,
+                    "prompt": {
+                        "retrieval": {
+                            "index_name": null,
+                            "k": 0,
+                            "model_name": null,
+                            "type": null
+                        },
+                        "task": null,
+                        "template": null
+                    },
+                    "sequence_length": null,
+                    "tokenizer": "space_punct",
+                    "unknown_symbol": "<UNK>",
+                    "vocab_file": null
+                }
+            }
+        },
+        "generation": {
+            "bad_words_ids": null,
+            "begin_suppress_tokens": null,
+            "bos_token_id": null,
+            "diversity_penalty": 0.0,
+            "do_sample": true,
+            "early_stopping": false,
+            "encoder_repetition_penalty": 1.0,
+            "eos_token_id": null,
+            "epsilon_cutoff": 0.0,
+            "eta_cutoff": 0.0,
+            "exponential_decay_length_penalty": null,
+            "force_words_ids": null,
+            "forced_bos_token_id": null,
+            "forced_decoder_ids": null,
+            "forced_eos_token_id": null,
+            "guidance_scale": null,
+            "length_penalty": 1.0,
+            "max_length": 32,
+            "max_new_tokens": 512,
+            "max_time": null,
+            "min_length": 0,
+            "min_new_tokens": null,
+            "no_repeat_ngram_size": 0,
+            "num_beam_groups": 1,
+            "num_beams": 1,
+            "pad_token_id": null,
+            "penalty_alpha": null,
+            "remove_invalid_values": false,
+            "renormalize_logits": false,
+            "repetition_penalty": 1.0,
+            "sequence_bias": null,
+            "suppress_tokens": null,
+            "temperature": 0.1,
+            "top_k": 50,
+            "top_p": 1.0,
+            "typical_p": 1.0,
+            "use_cache": true
+        },
+        "hyperopt": null,
+        "input_features": [
+            {
+                "active": true,
+                "column": "instruction",
+                "encoder": {
+                    "skip": false,
+                    "type": "passthrough"
+                },
+                "name": "instruction",
+                "preprocessing": {
+                    "cache_encoder_embeddings": false,
+                    "compute_idf": false,
+                    "computed_fill_value": "<UNK>",
+                    "fill_value": "<UNK>",
+                    "lowercase": true,
+                    "max_sequence_length": null,
+                    "missing_value_strategy": "fill_with_const",
+                    "most_common": 20000,
+                    "ngram_size": 2,
+                    "padding": "left",
+                    "padding_symbol": "<PAD>",
+                    "pretrained_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+                    "sequence_length": null,
+                    "tokenizer": "hf_tokenizer",
+                    "unknown_symbol": "<UNK>",
+                    "vocab_file": null
+                },
+                "proc_column": "instruction_TityHg",
+                "tied": null,
+                "type": "text"
+            }
+        ],
+        "ludwig_version": "0.8.6",
+        "model_parameters": null,
+        "model_type": "llm",
+        "output_features": [
+            {
+                "active": true,
+                "class_similarities": null,
+                "column": "output",
+                "decoder": {
+                    "fc_activation": "relu",
+                    "fc_bias_initializer": "zeros",
+                    "fc_dropout": 0.0,
+                    "fc_layers": null,
+                    "fc_norm": null,
+                    "fc_norm_params": null,
+                    "fc_output_size": 256,
+                    "fc_use_bias": true,
+                    "fc_weights_initializer": "xavier_uniform",
+                    "input_size": null,
+                    "max_new_tokens": 512,
+                    "num_fc_layers": 0,
+                    "pretrained_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+                    "tokenizer": "hf_tokenizer",
+                    "type": "text_extractor",
+                    "vocab_file": ""
+                },
+                "default_validation_metric": "loss",
+                "dependencies": [],
+                "input_size": null,
+                "loss": {
+                    "class_similarities": null,
+                    "class_similarities_temperature": 0,
+                    "class_weights": null,
+                    "confidence_penalty": 0,
+                    "robust_lambda": 0,
+                    "type": "next_token_softmax_cross_entropy",
+                    "unique": false,
+                    "weight": 1.0
+                },
+                "name": "output",
+                "num_classes": null,
+                "preprocessing": {
+                    "cache_encoder_embeddings": false,
+                    "compute_idf": false,
+                    "computed_fill_value": "<UNK>",
+                    "fill_value": "<UNK>",
+                    "lowercase": true,
+                    "max_sequence_length": null,
+                    "missing_value_strategy": "drop_row",
+                    "most_common": 20000,
+                    "ngram_size": 2,
+                    "padding": "left",
+                    "padding_symbol": "<PAD>",
+                    "pretrained_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+                    "sequence_length": null,
+                    "tokenizer": "hf_tokenizer",
+                    "unknown_symbol": "<UNK>",
+                    "vocab_file": null
+                },
+                "proc_column": "output_9bi87u",
+                "reduce_dependencies": "sum",
+                "reduce_input": "sum",
+                "type": "text"
+            }
+        ],
+        "preprocessing": {
+            "global_max_sequence_length": 512,
+            "oversample_minority": null,
+            "sample_ratio": 1.0,
+            "sample_size": null,
+            "split": {
+                "probabilities": [
+                    1.0,
+                    0.0,
+                    0.0
+                ],
+                "type": "random"
+            },
+            "undersample_majority": null
+        },
+        "prompt": {
+            "retrieval": {
+                "index_name": null,
+                "k": 0,
+                "model_name": null,
+                "type": null
+            },
+            "task": null,
+            "template": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n### Instruction:\n### Input: {input}\n### Response:"
+        },
+        "quantization": {
+            "bits": 4,
+            "bnb_4bit_compute_dtype": "float16",
+            "bnb_4bit_quant_type": "nf4",
+            "bnb_4bit_use_double_quant": true,
+            "llm_int8_has_fp16_weight": false,
+            "llm_int8_threshold": 6.0
+        },
+        "trainer": {
+            "base_learning_rate": 0.0,
+            "batch_size": 1,
+            "bucketing_field": null,
+            "checkpoints_per_epoch": 0,
+            "compile": false,
+            "early_stop": 5,
+            "effective_batch_size": "auto",
+            "enable_gradient_checkpointing": false,
+            "enable_profiling": false,
+            "epochs": 2,
+            "eval_batch_size": 2,
+            "evaluate_training_set": false,
+            "gradient_accumulation_steps": 16,
+            "gradient_clipping": {
+                "clipglobalnorm": 0.5,
+                "clipnorm": null,
+                "clipvalue": null
+            },
+            "increase_batch_size_eval_metric": "loss",
+            "increase_batch_size_eval_split": "training",
+            "increase_batch_size_on_plateau": 0,
+            "increase_batch_size_on_plateau_patience": 5,
+            "increase_batch_size_on_plateau_rate": 2.0,
+            "learning_rate": 0.0005,
+            "learning_rate_scaling": "linear",
+            "learning_rate_scheduler": {
+                "decay": null,
+                "decay_rate": 0.96,
+                "decay_steps": 10000,
+                "eta_min": 0,
+                "reduce_eval_metric": "loss",
+                "reduce_eval_split": "training",
+                "reduce_on_plateau": 0,
+                "reduce_on_plateau_patience": 10,
+                "reduce_on_plateau_rate": 0.1,
+                "staircase": false,
+                "t_0": null,
+                "t_mult": 1,
+                "warmup_evaluations": 0,
+                "warmup_fraction": 0.03
+            },
+            "max_batch_size": 1099511627776,
+            "optimizer": {
+                "amsgrad": false,
+                "betas": [
+                    0.9,
+                    0.999
+                ],
+                "eps": 1e-08,
+                "type": "adam",
+                "weight_decay": 0.0
+            },
+            "profiler": {
+                "active": 3,
+                "repeat": 5,
+                "skip_first": 0,
+                "wait": 1,
+                "warmup": 1
+            },
+            "regularization_lambda": 0.0,
+            "regularization_type": "l2",
+            "should_shuffle": true,
+            "skip_all_evaluation": false,
+            "steps_per_checkpoint": 0,
+            "train_steps": null,
+            "type": "finetune",
+            "use_mixed_precision": false,
+            "validation_field": "output",
+            "validation_metric": "loss"
+        }
+    },
+    "data_format": "<class 'pandas.core.frame.DataFrame'>",
+    "ludwig_version": "0.8.6",
+    "random_seed": 42,
+    "torch_version": "2.1.0+cu121"
+}

api_experiment_run_0/model/training_set_metadata.json ADDED Viewed

The diff for this file is too large to render. See raw diff

final_checkpoint/README.md ADDED Viewed

	@@ -0,0 +1,257 @@

+---
+library_name: peft
+base_model: meta-llama/Llama-2-7b-hf
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Data Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Data Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+- PEFT 0.6.2
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+- PEFT 0.6.2
+## Training procedure
+The following `bitsandbytes` quantization config was used during training:
+- quant_method: bitsandbytes
+- load_in_8bit: False
+- load_in_4bit: True
+- llm_int8_threshold: 6.0
+- llm_int8_skip_modules: None
+- llm_int8_enable_fp32_cpu_offload: False
+- llm_int8_has_fp16_weight: False
+- bnb_4bit_quant_type: nf4
+- bnb_4bit_use_double_quant: True
+- bnb_4bit_compute_dtype: float16
+### Framework versions
+- PEFT 0.6.2

final_checkpoint/adapter_config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "lora_alpha": 16,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

final_checkpoint/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff040de074dd2a7aa9cbd8503ded069b074f1e4f8e3c3f45e0fc3926ac2a8065
+size 67155338

runs/Nov14_21-53-35_jupyter-carlosruizmoreno/events.out.tfevents.1699998818.jupyter-carlosruizmoreno.526.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4cafb8ea2b81e391675d864f788346014c786389890c3dc066cc8cfbc2c8713e
+size 6453

runs/Nov15_09-42-53_jupyter-carlosruizmoreno/events.out.tfevents.1700041375.jupyter-carlosruizmoreno.623.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:845fc7be2377b79155a3a7fae8f8ddcf4300f6320bdae80f4ac9a2db6888a7d7
+size 9593

runs/Nov15_11-40-06_jupyter-carlosruizmoreno/events.out.tfevents.1700048408.jupyter-carlosruizmoreno.623.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9260ab564506a4949dbf7b9ccbddfcaa6b5ae7ed4d79e3843f555ac38c7fea61
+size 40

runs/Nov15_11-41-48_jupyter-carlosruizmoreno/events.out.tfevents.1700048511.jupyter-carlosruizmoreno.2192.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3c3f3c0fa9e343538403f452c37fc2f32f5cd37f1b24355b9fd393de3ab3233
+size 4565

runs/Nov15_11-47-35_jupyter-carlosruizmoreno/events.out.tfevents.1700048867.jupyter-carlosruizmoreno.2530.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52ece578ceedd5ba2365a6139e707d72ef5d98138d0d25b7ab3c0d536562fd7e
+size 4565

runs/Nov15_11-49-46_jupyter-carlosruizmoreno/events.out.tfevents.1700048989.jupyter-carlosruizmoreno.2530.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02c4a424b803d40dd9586e0c5d81cf9d248301721babb321337d11031b22f730
+size 4564

runs/Nov15_11-50-51_jupyter-carlosruizmoreno/events.out.tfevents.1700049053.jupyter-carlosruizmoreno.2971.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:daccf10988cebaad93dcbb4cdf2167048834486520dbbd767f91588a71d940bd
+size 8022

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0713cc91f7b7cd310bc527e52fc31872b4f8739d95c22b48ad0ca8ce2a7019a4
+size 4600