Upload 14 files

Browse files

Files changed (14) hide show

checkpoint-912/README.md +202 -0
checkpoint-912/adapter_config.json +29 -0
checkpoint-912/adapter_model.safetensors +3 -0
checkpoint-912/optimizer.pt +3 -0
checkpoint-912/pytorch_model.bin +3 -0
checkpoint-912/rng_state_0.pth +3 -0
checkpoint-912/rng_state_1.pth +3 -0
checkpoint-912/scheduler.pt +3 -0
checkpoint-912/special_tokens_map.json +34 -0
checkpoint-912/tokenizer.json +3 -0
checkpoint-912/tokenizer.model +3 -0
checkpoint-912/tokenizer_config.json +70 -0
checkpoint-912/trainer_state.json +273 -0
checkpoint-912/training_args.bin +3 -0

checkpoint-912/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: google/gemma-2b-it
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.0

checkpoint-912/adapter_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+ "alpha_pattern": {},
+ "auto_mapping": null,
+ "base_model_name_or_path": "google/gemma-2b-it",
+ "bias": "none",
+ "fan_in_fan_out": false,
+ "inference_mode": true,
+ "init_lora_weights": true,
+ "layer_replication": null,
+ "layers_pattern": null,
+ "layers_to_transform": null,
+ "loftq_config": {},
+ "lora_alpha": 16,
+ "lora_dropout": 0.045,
+ "megatron_config": null,
+ "megatron_core": "megatron.core",
+ "modules_to_save": null,
+ "peft_type": "LORA",
+ "r": 16,
+ "rank_pattern": {},
+ "revision": null,
+ "target_modules": [
+ "q_proj",
+ "v_proj"
+ ],
+ "task_type": "CAUSAL_LM",
+ "use_dora": false,
+ "use_rslora": false
+}

checkpoint-912/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00f9e7b6d8d4ae4883c36f420ea83b63ba29f533a347c6b2948ddab845e5a0e0
+size 7382336

checkpoint-912/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fd926e09d70479f99957bb866a4c7fb8b29331f4de26afe0027eb6520937329
+size 14806394

checkpoint-912/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:049c26b844b79121ddd8379f7f69194e63f6fbf6aa007eeac0c66f17eebb8893
+size 888

checkpoint-912/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b46ec9f76d63ced83e360da98bd444bf04dab069d5b0317232e7933146edd71f
+size 14512

checkpoint-912/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5fefdbadb260b9541ef3e7a6e9ba19f0929ab4057e8b83b762e061f0e8d16b63
+size 14512

checkpoint-912/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e18a7a1b9962f609d6935f933a76a72dd978765ba3ed938adfc4a2ade4dfa0e
+size 1064

checkpoint-912/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+ "additional_special_tokens": [
+ "<start_of_turn>",
+ "<end_of_turn>"
+ ],
+ "bos_token": {
+ "content": "<bos>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<eos>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "<pad>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "<unk>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}

checkpoint-912/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05e97791a5e007260de1db7e1692e53150e08cea481e2bf25435553380c147ee
+size 17477929

checkpoint-912/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2
+size 4241003

checkpoint-912/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "<pad>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "<eos>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "<bos>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "<unk>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "106": {
+ "content": "<start_of_turn>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "107": {
+ "content": "<end_of_turn>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [
+ "<start_of_turn>",
+ "<end_of_turn>"
+ ],
+ "bos_token": "<bos>",
+ "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\n' + message['content'] | trim + '<end_of_turn>\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\n'}}{% endif %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<eos>",
+ "legacy": null,
+ "model_max_length": 1024,
+ "pad_token": "<pad>",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "tokenizer_class": "GemmaTokenizer",
+ "unk_token": "<unk>",
+ "use_default_system_prompt": false
+}

checkpoint-912/trainer_state.json ADDED Viewed

	@@ -0,0 +1,273 @@

+{
+ "best_metric": null,
+ "best_model_checkpoint": null,
+ "epoch": 7.947712418300654,
+ "eval_steps": 500,
+ "global_step": 912,
+ "is_hyper_param_search": false,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.22,
+ "grad_norm": 1.7954288721084595,
+ "learning_rate": 0.0005434782608695652,
+ "loss": 3.3233,
+ "step": 25
+ },
+ {
+ "epoch": 0.44,
+ "grad_norm": 0.6491029262542725,
+ "learning_rate": 0.0010869565217391304,
+ "loss": 2.1773,
+ "step": 50
+ },
+ {
+ "epoch": 0.65,
+ "grad_norm": 0.5170515775680542,
+ "learning_rate": 0.0016304347826086956,
+ "loss": 1.8647,
+ "step": 75
+ },
+ {
+ "epoch": 0.87,
+ "grad_norm": 0.2956017255783081,
+ "learning_rate": 0.0019804878048780487,
+ "loss": 1.7435,
+ "step": 100
+ },
+ {
+ "epoch": 1.09,
+ "grad_norm": 0.297506719827652,
+ "learning_rate": 0.001919512195121951,
+ "loss": 1.6766,
+ "step": 125
+ },
+ {
+ "epoch": 1.31,
+ "grad_norm": 0.31445440649986267,
+ "learning_rate": 0.0018585365853658537,
+ "loss": 1.6467,
+ "step": 150
+ },
+ {
+ "epoch": 1.53,
+ "grad_norm": 0.29371771216392517,
+ "learning_rate": 0.001797560975609756,
+ "loss": 1.6527,
+ "step": 175
+ },
+ {
+ "epoch": 1.74,
+ "grad_norm": 0.34706446528434753,
+ "learning_rate": 0.0017365853658536585,
+ "loss": 1.6367,
+ "step": 200
+ },
+ {
+ "epoch": 1.96,
+ "grad_norm": 0.40003177523612976,
+ "learning_rate": 0.0016756097560975609,
+ "loss": 1.6234,
+ "step": 225
+ },
+ {
+ "epoch": 2.18,
+ "grad_norm": 0.38144171237945557,
+ "learning_rate": 0.0016146341463414635,
+ "loss": 1.5696,
+ "step": 250
+ },
+ {
+ "epoch": 2.4,
+ "grad_norm": 0.2919274568557739,
+ "learning_rate": 0.0015536585365853658,
+ "loss": 1.5572,
+ "step": 275
+ },
+ {
+ "epoch": 2.61,
+ "grad_norm": 0.36394545435905457,
+ "learning_rate": 0.0014926829268292682,
+ "loss": 1.544,
+ "step": 300
+ },
+ {
+ "epoch": 2.83,
+ "grad_norm": 0.46701276302337646,
+ "learning_rate": 0.0014317073170731706,
+ "loss": 1.5656,
+ "step": 325
+ },
+ {
+ "epoch": 3.05,
+ "grad_norm": 0.3606548607349396,
+ "learning_rate": 0.0013707317073170732,
+ "loss": 1.5567,
+ "step": 350
+ },
+ {
+ "epoch": 3.27,
+ "grad_norm": 0.478109210729599,
+ "learning_rate": 0.0013097560975609756,
+ "loss": 1.4932,
+ "step": 375
+ },
+ {
+ "epoch": 3.49,
+ "grad_norm": 0.3231596052646637,
+ "learning_rate": 0.001248780487804878,
+ "loss": 1.4906,
+ "step": 400
+ },
+ {
+ "epoch": 3.7,
+ "grad_norm": 0.31399714946746826,
+ "learning_rate": 0.0011878048780487804,
+ "loss": 1.4919,
+ "step": 425
+ },
+ {
+ "epoch": 3.92,
+ "grad_norm": 0.3154957592487335,
+ "learning_rate": 0.0011268292682926828,
+ "loss": 1.4964,
+ "step": 450
+ },
+ {
+ "epoch": 4.14,
+ "grad_norm": 0.40386953949928284,
+ "learning_rate": 0.0010658536585365854,
+ "loss": 1.4469,
+ "step": 475
+ },
+ {
+ "epoch": 4.36,
+ "grad_norm": 0.3771909177303314,
+ "learning_rate": 0.0010048780487804877,
+ "loss": 1.4155,
+ "step": 500
+ },
+ {
+ "epoch": 4.58,
+ "grad_norm": 0.3730674982070923,
+ "learning_rate": 0.0009439024390243902,
+ "loss": 1.4165,
+ "step": 525
+ },
+ {
+ "epoch": 4.79,
+ "grad_norm": 0.4216112196445465,
+ "learning_rate": 0.0008829268292682927,
+ "loss": 1.4325,
+ "step": 550
+ },
+ {
+ "epoch": 5.01,
+ "grad_norm": 0.3717198371887207,
+ "learning_rate": 0.0008219512195121951,
+ "loss": 1.4369,
+ "step": 575
+ },
+ {
+ "epoch": 5.23,
+ "grad_norm": 0.4491746127605438,
+ "learning_rate": 0.0007609756097560976,
+ "loss": 1.3317,
+ "step": 600
+ },
+ {
+ "epoch": 5.45,
+ "grad_norm": 0.4487883448600769,
+ "learning_rate": 0.0007,
+ "loss": 1.3397,
+ "step": 625
+ },
+ {
+ "epoch": 5.66,
+ "grad_norm": 0.40379399061203003,
+ "learning_rate": 0.0006390243902439025,
+ "loss": 1.3571,
+ "step": 650
+ },
+ {
+ "epoch": 5.88,
+ "grad_norm": 0.3959712088108063,
+ "learning_rate": 0.0005780487804878049,
+ "loss": 1.3749,
+ "step": 675
+ },
+ {
+ "epoch": 6.1,
+ "grad_norm": 0.42109012603759766,
+ "learning_rate": 0.0005170731707317074,
+ "loss": 1.3143,
+ "step": 700
+ },
+ {
+ "epoch": 6.32,
+ "grad_norm": 0.45575135946273804,
+ "learning_rate": 0.0004560975609756098,
+ "loss": 1.2621,
+ "step": 725
+ },
+ {
+ "epoch": 6.54,
+ "grad_norm": 0.4629908502101898,
+ "learning_rate": 0.00039512195121951224,
+ "loss": 1.2827,
+ "step": 750
+ },
+ {
+ "epoch": 6.75,
+ "grad_norm": 0.43169212341308594,
+ "learning_rate": 0.0003341463414634147,
+ "loss": 1.295,
+ "step": 775
+ },
+ {
+ "epoch": 6.97,
+ "grad_norm": 0.4449198544025421,
+ "learning_rate": 0.0002731707317073171,
+ "loss": 1.3039,
+ "step": 800
+ },
+ {
+ "epoch": 7.19,
+ "grad_norm": 0.4514065384864807,
+ "learning_rate": 0.0002121951219512195,
+ "loss": 1.2146,
+ "step": 825
+ },
+ {
+ "epoch": 7.41,
+ "grad_norm": 0.48326772451400757,
+ "learning_rate": 0.00015121951219512194,
+ "loss": 1.2147,
+ "step": 850
+ },
+ {
+ "epoch": 7.63,
+ "grad_norm": 0.4626155197620392,
+ "learning_rate": 9.024390243902438e-05,
+ "loss": 1.2162,
+ "step": 875
+ },
+ {
+ "epoch": 7.84,
+ "grad_norm": 0.4477863013744354,
+ "learning_rate": 2.9268292682926833e-05,
+ "loss": 1.2255,
+ "step": 900
+ }
+ ],
+ "logging_steps": 25,
+ "max_steps": 912,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 8,
+ "save_steps": 500,
+ "total_flos": 8.89238090332242e+16,
+ "train_batch_size": 1,
+ "trial_name": null,
+ "trial_params": null
+}

checkpoint-912/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4315b67556862b9ce3cbf60203821e3e2e372e434fc9b6fa9d65efd814fab5e3
+size 4920