PEFT
Safetensors
French
eltorio commited on
Commit
ff6592c
·
verified ·
1 Parent(s): b545583

Training in progress, epoch 1

Browse files
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
24
- "up_proj",
25
- "v_proj",
26
- "down_proj",
27
  "k_proj",
 
 
28
  "o_proj",
29
- "q_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
 
 
23
  "k_proj",
24
+ "v_proj",
25
+ "up_proj",
26
  "o_proj",
27
+ "down_proj",
28
+ "q_proj",
29
+ "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:089302d2055542e154d716dc0a614198b212c3143e1131cd700f2d11bfc89262
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddce69774d1745c74f005a535e050ccc8c86ce46c4d92519e9f78a08c4dbd773
3
  size 97307544
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9c9e219ac9eec7c128200d707f49318339fc57b071a906213a35371f7c7ec9f
3
- size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49eda80b5f50102214bcde180bdc1f218d118d0751c7f1bdaddb0e9e1a768dec
3
+ size 5560
wandb/debug-internal.log ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-05T14:39:13.215311083Z","level":"INFO","msg":"using version","core version":"0.18.3"}
2
+ {"time":"2024-12-05T14:39:13.215339406Z","level":"INFO","msg":"created symlink","path":"/kaggle/working/wandb/run-20241205_143913-2mwisme1/logs/debug-core.log"}
3
+ {"time":"2024-12-05T14:39:15.217256419Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
4
+ {"time":"2024-12-05T14:39:15.221746354Z","level":"INFO","msg":"created new stream","id":"2mwisme1"}
5
+ {"time":"2024-12-05T14:39:15.221774614Z","level":"INFO","msg":"stream: started","id":"2mwisme1"}
6
+ {"time":"2024-12-05T14:39:15.221805326Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"2mwisme1"}}
7
+ {"time":"2024-12-05T14:39:15.221843207Z","level":"INFO","msg":"handler: started","stream_id":{"value":"2mwisme1"}}
8
+ {"time":"2024-12-05T14:39:15.221842955Z","level":"INFO","msg":"sender: started","stream_id":{"value":"2mwisme1"}}
9
+ {"time":"2024-12-05T14:39:15.415690183Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
10
+ {"time":"2024-12-05T14:39:15.416772438Z","level":"INFO","msg":"Starting system monitor"}
11
+ {"time":"2024-12-05T14:39:26.759443118Z","level":"INFO","msg":"Pausing system monitor"}
12
+ {"time":"2024-12-05T14:41:15.890079536Z","level":"INFO","msg":"Resuming system monitor"}
13
+ {"time":"2024-12-05T14:41:20.002045749Z","level":"INFO","msg":"Pausing system monitor"}
14
+ {"time":"2024-12-05T14:41:23.883801968Z","level":"INFO","msg":"Resuming system monitor"}
15
+ {"time":"2024-12-05T14:41:24.057644952Z","level":"INFO","msg":"Pausing system monitor"}
16
+ {"time":"2024-12-05T14:41:27.124441713Z","level":"INFO","msg":"Resuming system monitor"}
17
+ {"time":"2024-12-05T14:41:27.164833611Z","level":"INFO","msg":"Pausing system monitor"}
18
+ {"time":"2024-12-05T14:41:30.712386169Z","level":"INFO","msg":"Resuming system monitor"}
19
+ {"time":"2024-12-05T14:41:30.719681016Z","level":"INFO","msg":"Pausing system monitor"}
20
+ {"time":"2024-12-05T14:41:33.978004525Z","level":"INFO","msg":"Resuming system monitor"}
wandb/debug.log ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
2
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Configure stats pid to 260
3
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Loading settings from /root/.config/wandb/settings
4
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Loading settings from /kaggle/working/wandb/settings
5
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***'}
6
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program': '<python with no main file>'}
7
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Applying login settings: {}
8
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:_log_setup():532] Logging user logs to /kaggle/working/wandb/run-20241205_143913-2mwisme1/logs/debug.log
10
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:_log_setup():533] Logging internal logs to /kaggle/working/wandb/run-20241205_143913-2mwisme1/logs/debug-internal.log
11
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:_jupyter_setup():478] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x78025df07a30>
12
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:init():617] calling init triggers
13
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-12-05 14:39:13,211 INFO MainThread:260 [wandb_init.py:init():667] starting backend
16
+ 2024-12-05 14:39:13,211 INFO MainThread:260 [wandb_init.py:init():671] sending inform_init request
17
+ 2024-12-05 14:39:13,212 INFO MainThread:260 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-12-05 14:39:13,212 INFO MainThread:260 [wandb_init.py:init():684] backend started and connected
19
+ 2024-12-05 14:39:13,225 INFO MainThread:260 [wandb_run.py:_label_probe_notebook():1346] probe notebook
20
+ 2024-12-05 14:39:13,734 INFO MainThread:260 [wandb_init.py:init():779] updated telemetry
21
+ 2024-12-05 14:39:13,738 INFO MainThread:260 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
22
+ 2024-12-05 14:39:15,412 INFO MainThread:260 [wandb_init.py:init():863] starting run threads in backend
23
+ 2024-12-05 14:39:16,103 INFO MainThread:260 [wandb_run.py:_console_start():2465] atexit reg
24
+ 2024-12-05 14:39:16,103 INFO MainThread:260 [wandb_run.py:_redirect():2313] redirect: wrap_raw
25
+ 2024-12-05 14:39:16,103 INFO MainThread:260 [wandb_run.py:_redirect():2378] Wrapping output streams.
26
+ 2024-12-05 14:39:16,104 INFO MainThread:260 [wandb_run.py:_redirect():2403] Redirects installed.
27
+ 2024-12-05 14:39:16,111 INFO MainThread:260 [wandb_init.py:init():907] run started, returning control to user process
28
+ 2024-12-05 14:39:16,115 INFO MainThread:260 [wandb_run.py:_config_callback():1394] config_cb None None {'peft_config': {'default': {'peft_type': <PeftType.LORA: 'LORA'>, 'auto_mapping': None, 'base_model_name_or_path': 'unsloth/llama-3.2-3b-instruct-bnb-4bit', 'revision': None, 'task_type': <TaskType.CAUSAL_LM: 'CAUSAL_LM'>, 'inference_mode': False, 'r': 16, 'target_modules': {'k_proj', 'v_proj', 'up_proj', 'o_proj', 'down_proj', 'q_proj', 'gate_proj'}, 'lora_alpha': 16, 'lora_dropout': 0, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'loftq_config': {}, 'use_dora': False, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}}}, 'vocab_size': 128256, 'max_position_embeddings': 131072, 'hidden_size': 3072, 'intermediate_size': 8192, 'num_hidden_layers': 28, 'num_attention_heads': 24, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 500000.0, 'rope_scaling': {'factor': 32.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 128, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 128000, 'pad_token_id': 128004, 'eos_token_id': [128001, 128008, 128009], 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'unsloth/llama-3.2-3b-instruct-bnb-4bit', '_attn_implementation_autoset': True, 'transformers_version': '4.47.0.dev0', 'model_type': 'llama', 'quantization_config': {'bnb_4bit_compute_dtype': 'float16', 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'llm_int8_skip_modules': None, 'llm_int8_threshold': 6.0, 'load_in_4bit': True, 'load_in_8bit': False, 'quant_method': 'bitsandbytes'}, 'unsloth_version': '2024.12.2', 'output_dir': '/kaggle/working', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0002, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 5, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Dec05_14-38-49_004ac713e0a4', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_8bit', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'Llama-3.2-3B-appreciation', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'dataset_text_field': 'text', 'packing': False, 'max_seq_length': 2048, 'dataset_num_proc': 2, 'dataset_batch_size': 1000, 'model_init_kwargs': None, 'dataset_kwargs': {}, 'eval_packing': None, 'num_of_sequences': 1024, 'chars_per_token': '<CHARS_PER_TOKEN>', 'use_liger': False}
29
+ 2024-12-05 14:39:16,125 INFO MainThread:260 [wandb_config.py:__setitem__():154] config set model/num_parameters = 3237063680 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7802a4f3a680>>
30
+ 2024-12-05 14:39:16,125 INFO MainThread:260 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 3237063680 None
31
+ 2024-12-05 14:39:26,758 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
32
+ 2024-12-05 14:39:26,759 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
33
+ 2024-12-05 14:41:15,889 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
34
+ 2024-12-05 14:41:20,001 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
35
+ 2024-12-05 14:41:20,001 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
36
+ 2024-12-05 14:41:23,882 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
37
+ 2024-12-05 14:41:24,057 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
38
+ 2024-12-05 14:41:24,057 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
39
+ 2024-12-05 14:41:27,123 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
40
+ 2024-12-05 14:41:27,130 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
41
+ 2024-12-05 14:41:27,130 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
42
+ 2024-12-05 14:41:30,711 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
43
+ 2024-12-05 14:41:30,719 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
44
+ 2024-12-05 14:41:30,719 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
45
+ 2024-12-05 14:41:33,975 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
46
+ 2024-12-05 14:41:37,610 INFO MainThread:260 [wandb_run.py:_config_callback():1394] config_cb None None {'peft_config': {'default': {'peft_type': <PeftType.LORA: 'LORA'>, 'auto_mapping': None, 'base_model_name_or_path': 'unsloth/llama-3.2-3b-instruct-bnb-4bit', 'revision': None, 'task_type': <TaskType.CAUSAL_LM: 'CAUSAL_LM'>, 'inference_mode': False, 'r': 16, 'target_modules': {'k_proj', 'v_proj', 'up_proj', 'o_proj', 'down_proj', 'q_proj', 'gate_proj'}, 'lora_alpha': 16, 'lora_dropout': 0, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'loftq_config': {}, 'use_dora': False, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}}}, 'vocab_size': 128256, 'max_position_embeddings': 131072, 'hidden_size': 3072, 'intermediate_size': 8192, 'num_hidden_layers': 28, 'num_attention_heads': 24, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 500000.0, 'rope_scaling': {'factor': 32.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 128, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 128000, 'pad_token_id': 128004, 'eos_token_id': [128001, 128008, 128009], 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'unsloth/llama-3.2-3b-instruct-bnb-4bit', '_attn_implementation_autoset': True, 'transformers_version': '4.47.0.dev0', 'model_type': 'llama', 'quantization_config': {'bnb_4bit_compute_dtype': 'float16', 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'llm_int8_skip_modules': None, 'llm_int8_threshold': 6.0, 'load_in_4bit': True, 'load_in_8bit': False, 'quant_method': 'bitsandbytes'}, 'unsloth_version': '2024.12.2', 'output_dir': '/kaggle/working', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0002, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 5, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Dec05_14-41-15_004ac713e0a4', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_8bit', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'Llama-3.2-3B-appreciation', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'dataset_text_field': 'text', 'packing': False, 'max_seq_length': 2048, 'dataset_num_proc': 2, 'dataset_batch_size': 1000, 'model_init_kwargs': None, 'dataset_kwargs': {}, 'eval_packing': None, 'num_of_sequences': 1024, 'chars_per_token': '<CHARS_PER_TOKEN>', 'use_liger': False}
47
+ 2024-12-05 14:41:37,619 INFO MainThread:260 [wandb_config.py:__setitem__():154] config set model/num_parameters = 3237063680 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7802a4f3a680>>
48
+ 2024-12-05 14:41:37,619 INFO MainThread:260 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 3237063680 None
wandb/run-20241205_143913-2mwisme1/files/output.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ==((====))== Unsloth - 2x faster free finetuning | Num GPUs = 1
2
+ \\ /| Num examples = 468 | Num Epochs = 5
3
+ O^O/ \_/ \ Batch size per device = 2 | Gradient Accumulation steps = 4
4
+ \ / Total batch size = 8 | Total steps = 290
5
+ "-____-" Number of trainable parameters = 24,313,856
wandb/run-20241205_143913-2mwisme1/files/requirements.txt ADDED
@@ -0,0 +1,814 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ brotlipy==0.7.0
2
+ PySocks==1.7.1
3
+ conda-content-trust==0+unknown
4
+ six==1.16.0
5
+ ruamel-yaml-conda==0.15.100
6
+ xformers==0.0.28.post3
7
+ peft==0.13.2
8
+ keras==3.7.0
9
+ triton==3.1.0
10
+ ml-dtypes==0.3.2
11
+ nvidia-cuda-cupti-cu12==12.1.105
12
+ nvidia-nvtx-cu12==12.1.105
13
+ pfzy==0.3.4
14
+ nvidia-nccl-cu12==2.21.5
15
+ inquirerpy==0.3.4
16
+ nvidia-cuda-nvrtc-cu12==12.1.105
17
+ mpmath==1.3.0
18
+ hf_transfer==0.1.8
19
+ cut-cross-entropy==24.12.1
20
+ nvidia-cudnn-cu12==9.1.0.70
21
+ pip3-autoremove==1.2.2
22
+ nvidia-cuda-runtime-cu12==12.1.105
23
+ tensorboard==2.16.2
24
+ torch==2.5.1+cu121
25
+ unsloth_zoo==2024.12.1
26
+ tensorflow==2.16.2
27
+ unsloth==2024.12.2
28
+ shtab==1.7.1
29
+ nvidia-cublas-cu12==12.1.3.1
30
+ transformers==4.47.0.dev0
31
+ sympy==1.13.1
32
+ bitsandbytes==0.44.2.dev0
33
+ trl==0.12.1
34
+ torchaudio==2.5.1+cu121
35
+ nvidia-curand-cu12==10.3.2.106
36
+ nvidia-cufft-cu12==11.0.2.54
37
+ nvidia-cusolver-cu12==11.4.5.107
38
+ torchvision==0.20.1+cu121
39
+ nvidia-cusparse-cu12==12.1.0.106
40
+ tyro==0.9.2
41
+ nvidia-nvjitlink-cu12==12.1.105
42
+ jupyter-lsp==1.5.1
43
+ packaging==21.3
44
+ jupyter_server==2.12.5
45
+ Wand==0.6.13
46
+ pytesseract==0.3.13
47
+ pypdf==5.0.1
48
+ pdf2image==1.17.0
49
+ kaggle-environments==1.14.15
50
+ safetensors==0.4.5
51
+ py-cpuinfo==9.0.0
52
+ qgrid==1.3.1
53
+ woodwork==0.31.0
54
+ gcsfs==2024.6.1
55
+ google-auth-httplib2==0.2.0
56
+ onnx==1.17.0
57
+ pytorch-ignite==0.5.1
58
+ MarkupSafe==2.1.5
59
+ sentry-sdk==2.15.0
60
+ torchinfo==1.8.0
61
+ ndindex==1.9.2
62
+ learntools==0.3.4
63
+ s3fs==2024.6.1
64
+ eval_type_backport==0.2.0
65
+ aiobotocore==2.15.1
66
+ featuretools==1.31.0
67
+ plotly-express==0.4.1
68
+ aiohttp==3.9.5
69
+ easyocr==1.7.2
70
+ slicer==0.0.7
71
+ pyemd==1.0.0
72
+ tsfresh==0.20.3
73
+ vec_noise==1.1.4
74
+ pandasql==0.7.3
75
+ setproctitle==1.3.3
76
+ albucore==0.0.17
77
+ multiprocess==0.70.16
78
+ opencv-python-headless==4.10.0.84
79
+ gymnasium==0.29.0
80
+ openpyxl==3.1.5
81
+ cesium==0.12.3
82
+ torchmetrics==1.4.2
83
+ shap==0.44.1
84
+ watchdog==5.0.3
85
+ google-api-python-client==2.147.0
86
+ ghapi==1.0.6
87
+ jieba==0.42.1
88
+ alembic==1.13.3
89
+ docker-pycreds==0.4.0
90
+ scipy==1.14.1
91
+ blinker==1.8.2
92
+ nbdev==2.3.31
93
+ bqplot==0.12.43
94
+ pydub==0.25.1
95
+ python-bidi==0.6.0
96
+ pycryptodome==3.20.0
97
+ tables==3.10.1
98
+ ninja==1.11.1.1
99
+ tokenizers==0.20.0
100
+ pydegensac==0.1.2
101
+ pyclipper==1.3.0.post5
102
+ blosc2==2.7.1
103
+ PyArabic==0.6.15
104
+ fastai==2.7.17
105
+ gym-notices==0.0.8
106
+ itsdangerous==2.2.0
107
+ numpy==1.26.4
108
+ Werkzeug==3.0.4
109
+ traittypes==0.2.1
110
+ optuna==4.0.0
111
+ Pympler==1.1
112
+ albumentations==1.4.17
113
+ scikit-learn==1.2.2
114
+ numba==0.60.0
115
+ timm==1.0.9
116
+ pytorch-lightning==2.4.0
117
+ colorlog==6.8.2
118
+ gym==0.26.2
119
+ execnb==0.1.6
120
+ llvmlite==0.43.0
121
+ llvmlite==0.43.0
122
+ earthengine-api==1.1.2
123
+ pettingzoo==1.24.0
124
+ segment_anything==1.0
125
+ aioitertools==0.12.0
126
+ rgf-python==3.12.0
127
+ Flask==3.0.3
128
+ wandb==0.18.3
129
+ stable-baselines3==2.1.0
130
+ datasets==3.0.1
131
+ lightning-utilities==0.11.7
132
+ huggingface-hub==0.25.1
133
+ et-xmlfile==1.1.0
134
+ accelerate==0.34.2
135
+ Shapely==1.8.5.post1
136
+ shapely==2.0.6
137
+ Shimmy==1.3.0
138
+ stumpy==1.13.0
139
+ Rtree==1.3.0
140
+ fsspec==2024.6.1
141
+ fsspec==2024.9.0
142
+ fastcore==1.7.10
143
+ fastdownload==0.0.7
144
+ gatspy==0.3
145
+ botocore==1.35.23
146
+ dill==0.3.8
147
+ google-cloud-bigquery==2.34.4
148
+ google-cloud-videointelligence==2.13.5
149
+ google-api-core==2.11.1
150
+ google-api-core==2.20.0
151
+ google-cloud-aiplatform==0.6.0a1
152
+ google-cloud-monitoring==2.21.0
153
+ google-auth==2.30.0
154
+ google-cloud-automl==1.0.1
155
+ google-cloud-storage==1.44.0
156
+ googleapis-common-protos==1.63.1
157
+ google-cloud-translate==3.12.1
158
+ google-cloud-core==2.4.1
159
+ google-generativeai==0.8.2
160
+ google-cloud-vision==2.8.0
161
+ google-cloud-datastore==2.20.1
162
+ google-cloud-artifact-registry==1.11.3
163
+ google-cloud-language==2.14.0
164
+ en-core-web-sm==3.7.1
165
+ en-core-web-lg==3.7.1
166
+ docstring-to-markdown==0.15
167
+ jupyterlab-lsp==5.1.0
168
+ traceml==1.0.8
169
+ qtconsole==5.6.0
170
+ ipywidgets==7.7.1
171
+ pytoolconfig==1.3.1
172
+ rope==1.13.0
173
+ async-lru==2.0.4
174
+ datatile==1.0.3
175
+ pydocstyle==6.3.0
176
+ QtPy==2.4.1
177
+ isort==5.13.2
178
+ tomlkit==0.13.2
179
+ olefile==0.47
180
+ mccabe==0.7.0
181
+ pylint==3.3.1
182
+ jupyter-console==6.6.3
183
+ astroid==3.3.4
184
+ yapf==0.40.2
185
+ jupyterlab==4.2.5
186
+ flake8==7.1.1
187
+ whatthepatch==1.0.6
188
+ pyflakes==3.2.0
189
+ pandas-summary==0.2.0
190
+ nbconvert==6.4.5
191
+ testpath==0.6.0
192
+ nbclient==0.5.13
193
+ python-lsp-server==1.12.0
194
+ kornia==0.7.3
195
+ snowballstemmer==2.2.0
196
+ python-lsp-jsonrpc==1.1.2
197
+ autopep8==2.0.4
198
+ pycodestyle==2.12.1
199
+ isoweek==1.3.3
200
+ widgetsnbextension==3.6.9
201
+ kornia_rs==0.1.5
202
+ mistune==0.8.4
203
+ openslide-python==1.3.1
204
+ html5lib==1.1
205
+ ppft==1.7.6.9
206
+ pathos==0.3.3
207
+ pox==0.3.5
208
+ pandas-datareader==0.10.0
209
+ category-encoders==2.6.4
210
+ mlcrate==0.2.0
211
+ Janome==0.5.0
212
+ annoy==1.17.3
213
+ yellowbrick==1.5
214
+ emoji==2.13.2
215
+ librosa==0.10.2.post1
216
+ soxr==0.5.0.post1
217
+ memory-profiler==0.61.0
218
+ sentencepiece==0.2.0
219
+ audioread==3.0.1
220
+ cufflinks==0.17.3
221
+ soundfile==0.12.1
222
+ lime==0.2.0.1
223
+ colorlover==0.3.0
224
+ CVXcanon==0.1.2
225
+ sklearn-pandas==2.2.0
226
+ scikit-multilearn==0.2.0
227
+ path==17.0.0
228
+ odfpy==1.4.1
229
+ mpld3==0.5.10
230
+ kaggle==1.6.17
231
+ narwhals==1.9.0
232
+ fury==0.11.0
233
+ xarray-einstats==0.8.0
234
+ scikit-surprise==1.1.4
235
+ ydata-profiling==4.10.0
236
+ panel==1.5.1
237
+ plotnine==0.13.6
238
+ py4j==0.10.9.7
239
+ fuzzywuzzy==0.18.0
240
+ fastprogress==1.0.3
241
+ update-checker==0.18.0
242
+ missingno==0.5.2
243
+ catboost==1.2.7
244
+ pyexcel-io==0.6.6
245
+ stopit==1.1.2
246
+ arviz==0.20.0
247
+ branca==0.8.0
248
+ mizani==0.11.4
249
+ nltk==3.2.4
250
+ semver==3.0.2
251
+ SimpleITK==2.4.0
252
+ TPOT==0.12.1
253
+ nibabel==5.2.1
254
+ folium==0.17.0
255
+ gpxpy==1.6.2
256
+ bayesian-optimization==1.5.1
257
+ hyperopt==0.2.7
258
+ python-louvain==0.16
259
+ orderly-set==5.2.2
260
+ typing-inspect==0.9.0
261
+ ecos==2.0.14
262
+ lxml==5.3.0
263
+ trx-python==0.3
264
+ iniconfig==2.0.0
265
+ leven==1.0.4
266
+ path.py==12.5.0
267
+ pymc3==3.11.4
268
+ wavio==0.0.9
269
+ lml==0.1.0
270
+ deap==1.4.1
271
+ marshmallow==3.22.0
272
+ pygltflib==1.16.2
273
+ numexpr==2.10.1
274
+ pyLDAvis==3.4.1
275
+ python-slugify==8.0.4
276
+ pydantic==2.9.2
277
+ langid==1.1.6
278
+ setuptools-scm==8.1.0
279
+ geojson==3.1.0
280
+ scikit-plot==0.3.7
281
+ holidays==0.57
282
+ nose==1.3.7
283
+ pytest==8.3.3
284
+ google-ai-generativelanguage==0.6.10
285
+ text-unidecode==1.3
286
+ Theano-PyMC==1.1.2
287
+ dipy==1.9.0
288
+ h5netcdf==1.3.0
289
+ funcy==2.0
290
+ holoviews==1.19.1
291
+ stanio==0.5.1
292
+ squarify==0.4.4
293
+ mlxtend==0.23.1
294
+ future==1.0.0
295
+ dataclasses-json==0.6.7
296
+ prophet==1.1.5
297
+ imgaug==0.4.0
298
+ nilearn==0.10.4
299
+ deepdiff==8.0.1
300
+ eli5==0.13.0
301
+ pyviz_comms==3.0.3
302
+ pydicom==3.0.1
303
+ mypy-extensions==1.0.0
304
+ haversine==2.8.1
305
+ sphinx-rtd-theme==0.2.4
306
+ altair==5.4.1
307
+ cmdstanpy==1.2.4
308
+ pyexcel-ods==0.6.0
309
+ preprocessing==0.1.13
310
+ matplotlib-venn==1.1.1
311
+ pyaml==24.9.0
312
+ scikit-optimize==0.10.2
313
+ vtk==9.3.1
314
+ xvfbwrapper==0.2.9
315
+ urwid_readline==0.15.1
316
+ scikit-learn-intelex==2024.7.0
317
+ Boruta==0.4.3
318
+ pybind11==2.13.6
319
+ line_profiler==4.1.3
320
+ datashader==0.16.3
321
+ fasttext==0.9.3
322
+ s3transfer==0.6.2
323
+ param==2.1.1
324
+ pudb==2024.1.2
325
+ jmespath==1.0.1
326
+ xarray==2024.9.0
327
+ colorcet==3.1.0
328
+ urwid==2.6.15
329
+ boto3==1.26.100
330
+ imbalanced-learn==0.12.3
331
+ daal4py==2024.7.0
332
+ daal==2024.7.0
333
+ libpysal==4.9.2
334
+ pyct==0.5.0
335
+ tbb==2021.13.1
336
+ gensim==4.3.3
337
+ textblob==0.18.0.post0
338
+ xgboost==2.0.3
339
+ opencv-python==4.10.0.84
340
+ Theano==1.0.5
341
+ hep-ml==0.7.2
342
+ opencv-contrib-python==4.10.0.84
343
+ kagglehub==0.3.1
344
+ keras-core==0.1.7
345
+ keras-nlp==0.15.1
346
+ tensorflow_decision_forests==1.9.1
347
+ wurlitzer==3.1.1
348
+ tensorflow-text==2.16.1
349
+ ydf==0.8.0
350
+ keras-cv==0.9.0
351
+ h2o==3.46.0.5
352
+ polars==1.9.0
353
+ pooch==1.8.2
354
+ igraph==0.11.6
355
+ optax==0.2.2
356
+ orbax-checkpoint==0.6.4
357
+ flax==0.8.4
358
+ chex==0.1.86
359
+ tensorstore==0.1.66
360
+ dask-expr==1.1.15
361
+ python-dateutil==2.9.0.post0
362
+ geographiclib==2.0
363
+ PyUpSet==0.1.1.post7
364
+ pandas==2.2.3
365
+ pandas==2.2.2
366
+ cloudpickle==3.0.0
367
+ matplotlib==3.7.5
368
+ matplotlib==3.9.2
369
+ ipympl==0.7.0
370
+ PyYAML==6.0.2
371
+ texttable==1.7.0
372
+ geopy==2.4.1
373
+ dask==2024.9.1
374
+ mne==1.8.0
375
+ pynvrtc==9.2
376
+ pycuda==2024.1.2
377
+ pytools==2024.1.14
378
+ Mako==1.3.5
379
+ jaxlib==0.4.26.dev20240620
380
+ jax==0.4.26
381
+ lightgbm==4.2.0
382
+ pynvml==11.4.1
383
+ annotated-types==0.7.0
384
+ srsly==2.4.8
385
+ catalogue==2.0.10
386
+ partd==1.4.2
387
+ langcodes==3.4.1
388
+ preshed==3.0.9
389
+ pytz==2024.2
390
+ pytz==2024.1
391
+ pynvjitlink-cu12==0.3.0
392
+ spacy-legacy==3.0.12
393
+ spacy==3.7.6
394
+ murmurhash==1.0.10
395
+ thinc==8.2.5
396
+ language_data==1.2.0
397
+ blis==0.7.10
398
+ pydantic_core==2.23.4
399
+ grpcio==1.62.2
400
+ grpcio==1.64.1
401
+ raft-dask==24.8.1
402
+ msgpack==1.1.0
403
+ msgpack==1.0.8
404
+ distributed==2024.7.1
405
+ wrapt==1.16.0
406
+ pylibraft==24.8.1
407
+ cymem==2.0.8
408
+ nvtx==0.2.10
409
+ spacy-loggers==1.0.5
410
+ wasabi==1.1.2
411
+ pyarrow==16.1.0
412
+ cupy==13.3.0
413
+ zict==3.0.0
414
+ bokeh==3.5.2
415
+ dask-cudf==24.8.3
416
+ treelite==4.3.0
417
+ xyzservices==2024.9.0
418
+ cuml==24.8.0
419
+ shellingham==1.5.4
420
+ proto-plus==1.23.0
421
+ locket==1.0.0
422
+ tzdata==2024.2
423
+ tzdata==2024.1
424
+ typer-slim==0.12.5
425
+ pyarrow-hotfix==0.6
426
+ toolz==0.12.1
427
+ rapids-dask-dependency==24.8.0a0
428
+ rmm==24.8.2
429
+ markdown-it-py==3.0.0
430
+ fastrlock==0.8.2
431
+ mdurl==0.1.2
432
+ weasel==0.4.1
433
+ rich==13.9.1
434
+ rich==13.7.1
435
+ cudf==24.8.3
436
+ confection==0.1.4
437
+ tblib==3.0.0
438
+ joblib==1.4.2
439
+ cuda-python==12.6.0
440
+ typer==0.12.5
441
+ typer==0.12.3
442
+ marisa-trie==1.1.0
443
+ distributed-ucxx==0.39.1
444
+ cloudpathlib==0.19.0
445
+ ucx-py==0.39.2
446
+ cytoolz==0.12.3
447
+ smart_open==7.0.4
448
+ click==8.1.7
449
+ dask-cuda==24.8.2
450
+ protobuf==4.25.3
451
+ protobuf==3.20.3
452
+ ucxx==0.39.1
453
+ sortedcontainers==2.4.0
454
+ lz4==4.3.3
455
+ pyparsing==3.1.4
456
+ pyparsing==3.1.2
457
+ zstandard==0.23.0
458
+ unicodedata2==15.1.0
459
+ fonttools==4.54.1
460
+ fonttools==4.53.0
461
+ pyshp==2.3.1
462
+ pillow==10.4.0
463
+ pillow==10.3.0
464
+ cycler==0.12.1
465
+ conda==24.9.0
466
+ certifi==2024.8.30
467
+ pyproj==3.7.0
468
+ pyproj==3.6.1
469
+ libmambapy==1.5.10
470
+ Cartopy==0.23.0
471
+ contourpy==1.3.0
472
+ contourpy==1.2.1
473
+ munkres==1.1.4
474
+ kiwisolver==1.4.7
475
+ kiwisolver==1.4.5
476
+ mamba==1.5.10
477
+ conda-libmamba-solver==23.12.0
478
+ graphviz==0.20.3
479
+ pycparser==2.22
480
+ nbdime==3.2.0
481
+ astunparse==1.6.3
482
+ fastapi-cli==0.0.4
483
+ jsonschema-specifications==2023.12.1
484
+ pandocfilters==1.5.0
485
+ opentelemetry-exporter-otlp==1.25.0
486
+ libclang==18.1.1
487
+ h11==0.14.0
488
+ grpc-google-iam-v1==0.12.7
489
+ rpds-py==0.18.1
490
+ jupyterlab_pygments==0.3.0
491
+ tensorflow-hub==0.16.1
492
+ cryptography==42.0.8
493
+ requests-oauthlib==2.0.0
494
+ pydata-google-auth==1.8.2
495
+ overrides==7.7.0
496
+ ipython-genutils==0.2.0
497
+ y-py==0.6.2
498
+ opentelemetry-proto==1.25.0
499
+ greenlet==3.0.3
500
+ nvidia-ml-py==11.495.46
501
+ PyJWT==2.8.0
502
+ time-machine==2.14.1
503
+ Cython==3.0.10
504
+ tensorflow-probability==0.24.0
505
+ click-plugins==1.1.1
506
+ google-cloud-pubsub==2.21.3
507
+ jupyter_core==5.7.2
508
+ webcolors==24.6.0
509
+ jupyterlab_server==2.27.2
510
+ grpcio-status==1.48.0
511
+ grpcio-status==1.48.2
512
+ fqdn==1.5.1
513
+ jeepney==0.8.0
514
+ google-cloud-recommendations-ai==0.7.1
515
+ httptools==0.6.1
516
+ ipython-sql==0.5.0
517
+ toml==0.10.2
518
+ ipykernel==6.29.4
519
+ tensorboardX==2.6.2.2
520
+ objsize==0.6.1
521
+ patsy==0.5.6
522
+ immutabledict==4.2.0
523
+ Jinja2==3.1.4
524
+ requests-toolbelt==0.10.1
525
+ statsmodels==0.14.2
526
+ tenacity==8.3.0
527
+ platformdirs==3.11.0
528
+ platformdirs==4.2.2
529
+ google-cloud-iam==2.15.0
530
+ typeguard==4.3.0
531
+ jupyter_server_terminals==0.5.3
532
+ httpcore==1.0.5
533
+ ipython==8.21.0
534
+ ipython==8.25.0
535
+ google-cloud-resource-manager==1.12.3
536
+ tensorflow-estimator==2.15.0
537
+ idna==3.7
538
+ pandas-profiling==3.6.6
539
+ tensorflow-cloud==0.1.16
540
+ distlib==0.3.8
541
+ lazy_loader==0.4
542
+ termcolor==2.4.0
543
+ tensorflow-datasets==4.9.6
544
+ importlib_resources==6.4.0
545
+ opentelemetry-exporter-otlp-proto-grpc==1.25.0
546
+ jupyter-ydoc==0.2.5
547
+ aiofiles==22.1.0
548
+ wordcloud==1.9.3
549
+ opencensus==0.11.4
550
+ jupyterlab_git==0.44.0
551
+ truststore==0.8.0
552
+ linkify-it-py==2.0.3
553
+ isoduration==20.11.0
554
+ google-cloud-bigquery-connection==1.15.3
555
+ setuptools==70.0.0
556
+ opentelemetry-semantic-conventions==0.46b0
557
+ cffi==1.16.0
558
+ pure-eval==0.2.2
559
+ webencodings==0.5.1
560
+ orjson==3.10.4
561
+ wheel==0.43.0
562
+ multidict==6.0.5
563
+ starlette==0.37.2
564
+ Deprecated==1.2.14
565
+ ImageHash==4.3.1
566
+ parso==0.8.4
567
+ psutil==5.9.3
568
+ psutil==5.9.8
569
+ stack-data==0.6.2
570
+ stack-data==0.6.3
571
+ virtualenv==20.21.0
572
+ entrypoints==0.4
573
+ opentelemetry-api==1.25.0
574
+ GitPython==3.1.43
575
+ oauthlib==3.2.2
576
+ jupyter_server_fileid==0.9.2
577
+ smmap==5.0.1
578
+ tensorflow-serving-api==2.16.1
579
+ kernels-mixer==0.0.13
580
+ jedi==0.19.1
581
+ argon2-cffi-bindings==21.2.0
582
+ namex==0.0.8
583
+ textual==0.67.1
584
+ h5py==3.11.0
585
+ pip==24.0
586
+ argon2-cffi==23.1.0
587
+ attrs==23.2.0
588
+ uri-template==1.3.0
589
+ multimethod==1.11.2
590
+ zipp==3.19.2
591
+ menuinst==2.1.1
592
+ pydot==1.4.2
593
+ defusedxml==0.7.1
594
+ decorator==5.1.1
595
+ fastjsonschema==2.19.1
596
+ asttokens==2.4.1
597
+ uvloop==0.19.0
598
+ Markdown==3.6
599
+ google-pasta==0.2.0
600
+ tensorboard_plugin_profile==2.15.1
601
+ parsy==2.1
602
+ google-cloud-jupyter-config==0.0.10
603
+ absl-py==1.4.0
604
+ prometheus_client==0.20.0
605
+ opt-einsum==3.3.0
606
+ charset-normalizer==3.3.2
607
+ kfp-server-api==2.0.5
608
+ ray-cpp==2.24.0
609
+ kfp-pipeline-spec==0.2.2
610
+ appdirs==1.4.4
611
+ google-resumable-media==2.7.1
612
+ pluggy==1.5.0
613
+ fiona==1.9.6
614
+ simpervisor==1.0.0
615
+ pkgutil_resolve_name==1.3.10
616
+ sqlparse==0.5.0
617
+ filelock==3.15.1
618
+ papermill==2.6.0
619
+ blessed==1.20.0
620
+ executing==2.0.1
621
+ watchfiles==0.22.0
622
+ colorful==0.5.6
623
+ wcwidth==0.2.13
624
+ async-timeout==4.0.3
625
+ debugpy==1.8.1
626
+ pexpect==4.9.0
627
+ ptyprocess==0.7.0
628
+ google-cloud-bigtable==1.7.3
629
+ archspec==0.2.3
630
+ nbformat==5.10.4
631
+ pins==0.8.6
632
+ gast==0.5.4
633
+ opencensus-context==0.1.3
634
+ nest-asyncio==1.6.0
635
+ ypy-websocket==0.8.4
636
+ notebook==6.5.7
637
+ exceptiongroup==1.2.0
638
+ ansicolors==1.1.8
639
+ multipledispatch==1.0.0
640
+ hdfs==2.7.3
641
+ jupyterlab_widgets==3.0.11
642
+ Babel==2.15.0
643
+ simple_parsing==0.1.5
644
+ dacite==1.8.1
645
+ cligj==0.7.2
646
+ fastavro==1.9.4
647
+ tifffile==2024.5.22
648
+ python-json-logger==2.0.7
649
+ cachetools==5.3.3
650
+ cachetools==4.2.4
651
+ tornado==6.4.1
652
+ tangled-up-in-unicode==0.2.0
653
+ anyio==4.4.0
654
+ docstring_parser==0.16
655
+ pickleshare==0.7.5
656
+ sqlglot==19.9.0
657
+ bigframes==0.22.0
658
+ bleach==6.1.0
659
+ keyrings.google-artifactregistry-auth==1.1.2
660
+ tinycss2==1.3.0
661
+ cached-property==1.5.2
662
+ pymongo==3.13.0
663
+ atpublic==4.1.0
664
+ cloud-tpu-client==0.10
665
+ tensorflow-metadata==0.14.0
666
+ urllib3==1.26.18
667
+ urllib3==2.2.1
668
+ pyu2f==0.1.5
669
+ mdit-py-plugins==0.4.1
670
+ terminado==0.18.1
671
+ Brotli==1.1.0
672
+ grpc-interceptor==0.15.4
673
+ uvicorn==0.30.1
674
+ tensorflow-io-gcs-filesystem==0.37.0
675
+ nb_conda==2.2.1
676
+ httplib2==0.21.0
677
+ gpustat==1.0.0
678
+ yarl==1.9.4
679
+ importlib-metadata==7.0.0
680
+ httpx==0.27.0
681
+ distro==1.9.0
682
+ PyWavelets==1.6.0
683
+ jupyter_server_ydoc==0.8.0
684
+ pyasn1==0.6.0
685
+ phik==0.12.4
686
+ cloud-tpu-profiler==2.4.0
687
+ email_validator==2.1.1
688
+ keras-tuner==1.4.7
689
+ array_record==0.5.1
690
+ fasteners==0.19
691
+ colorama==0.4.6
692
+ matplotlib-inline==0.1.7
693
+ nb_conda_kernels==2.5.1
694
+ beautifulsoup4==4.12.3
695
+ apache-beam==2.46.0
696
+ tabulate==0.9.0
697
+ tomli==2.0.1
698
+ notebook_shim==0.2.4
699
+ kfp==2.5.0
700
+ jupyter-http-over-ws==0.0.8
701
+ jsonpatch==1.33
702
+ threadpoolctl==3.5.0
703
+ ujson==5.10.0
704
+ aiosqlite==0.20.0
705
+ jaraco.classes==3.4.0
706
+ soupsieve==2.5
707
+ visions==0.7.5
708
+ scikit-image==0.23.2
709
+ gitdb==4.0.11
710
+ pendulum==3.0.0
711
+ memray==1.12.0
712
+ notebook_executor==0.2
713
+ google-crc32c==1.5.0
714
+ frozendict==2.4.4
715
+ geopandas==0.14.4
716
+ jax-jumpy==1.0.0
717
+ optree==0.11.0
718
+ pyzmq==26.0.3
719
+ opentelemetry-exporter-otlp-proto-common==1.25.0
720
+ uc-micro-py==1.0.3
721
+ xxhash==3.4.1
722
+ pyasn1_modules==0.4.0
723
+ uritemplate==3.0.1
724
+ more-itertools==10.3.0
725
+ prettytable==3.10.0
726
+ promise==2.3
727
+ pycosat==0.6.6
728
+ google-auth-oauthlib==1.2.0
729
+ traitlets==5.14.3
730
+ conda_package_streaming==0.10.0
731
+ ruamel.yaml==0.18.6
732
+ google-cloud-spanner==3.47.0
733
+ rfc3986-validator==0.1.1
734
+ Send2Trash==1.8.3
735
+ prompt_toolkit==3.0.47
736
+ sniffio==1.3.1
737
+ keyring==25.2.1
738
+ referencing==0.35.1
739
+ google-cloud-dlp==3.18.0
740
+ conda-package-handling==2.3.0
741
+ websockets==12.0
742
+ flatbuffers==24.3.25
743
+ jupyter-server-mathjax==0.2.6
744
+ comm==0.2.2
745
+ opentelemetry-exporter-otlp-proto-http==1.25.0
746
+ websocket-client==1.8.0
747
+ requests==2.32.3
748
+ retrying==1.3.3
749
+ retrying==1.3.4
750
+ google-cloud-pubsublite==1.10.0
751
+ explainable-ai-sdk==1.3.3
752
+ jsonpointer==2.4
753
+ typing_extensions==4.12.2
754
+ backports.tarfile==1.2.0
755
+ dnspython==2.6.1
756
+ Farama-Notifications==0.0.4
757
+ opentelemetry-sdk==1.25.0
758
+ docopt==0.6.2
759
+ ibis-framework==7.1.0
760
+ jaraco.functools==4.0.1
761
+ gviz-api==1.10.0
762
+ frozenlist==1.4.1
763
+ google-apitools==0.5.31
764
+ python-multipart==0.0.9
765
+ SQLAlchemy==2.0.30
766
+ kubernetes==26.1.0
767
+ witwidget==1.8.1
768
+ docker==7.1.0
769
+ bidict==0.23.1
770
+ jupyter-events==0.10.0
771
+ beatrix_jupyterlab==2024.66.154055
772
+ imageio==2.34.1
773
+ arrow==1.3.0
774
+ nbclassic==1.1.0
775
+ tqdm==4.66.4
776
+ networkx==3.3
777
+ python-dotenv==1.0.1
778
+ tf_keras==2.16.0
779
+ oauth2client==4.1.3
780
+ kt-legacy==1.0.5
781
+ fastapi==0.111.0
782
+ db-dtypes==1.2.0
783
+ SecretStorage==3.3.3
784
+ seaborn==0.12.2
785
+ rfc3339-validator==0.1.4
786
+ tensorflow-io==0.37.0
787
+ typing-utils==0.1.0
788
+ jupytext==1.16.2
789
+ jsonschema==4.22.0
790
+ humanize==4.9.0
791
+ google-cloud-functions==1.16.3
792
+ jaraco.context==5.3.0
793
+ htmlmin==0.1.12
794
+ tensorflow-transform==0.14.0
795
+ jupyter_server_proxy==4.2.0
796
+ crcmod==1.7
797
+ boltons==24.0.0
798
+ ruamel.yaml.clib==0.2.8
799
+ jupyter_client==7.4.9
800
+ json5==0.9.25
801
+ tensorboard-data-server==0.7.2
802
+ aiosignal==1.3.1
803
+ types-python-dateutil==2.9.0.20240316
804
+ etils==1.7.0
805
+ plotly==5.22.0
806
+ regex==2024.5.15
807
+ dataproc_jupyter_plugin==0.1.79
808
+ pyOpenSSL==24.0.0
809
+ py-spy==0.3.14
810
+ dm-tree==0.1.8
811
+ ray==2.24.0
812
+ Pygments==2.18.0
813
+ rsa==4.9
814
+ bq_helper==0.4.1
wandb/run-20241205_143913-2mwisme1/files/wandb-metadata.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.6.56+-x86_64-with-glibc2.35",
3
+ "python": "3.10.14",
4
+ "startedAt": "2024-12-05T14:39:13.213016Z",
5
+ "program": "kaggle.ipynb",
6
+ "email": "[email protected]",
7
+ "root": "/kaggle/working",
8
+ "host": "004ac713e0a4",
9
+ "username": "root",
10
+ "executable": "/opt/conda/bin/python3.10",
11
+ "cpu_count": 2,
12
+ "cpu_count_logical": 4,
13
+ "gpu": "[Tesla T4, Tesla T4]",
14
+ "gpu_count": 2,
15
+ "disk": {
16
+ "/": {
17
+ "total": "8656922775552",
18
+ "used": "6470900051968"
19
+ }
20
+ },
21
+ "memory": {
22
+ "total": "33662353408"
23
+ },
24
+ "cpu": {
25
+ "count": 2,
26
+ "countLogical": 4
27
+ },
28
+ "gpu_nvidia": [
29
+ {
30
+ "name": "Tesla T4",
31
+ "memoryTotal": "16106127360",
32
+ "cudaCores": 2560,
33
+ "architecture": "Turing"
34
+ },
35
+ {
36
+ "name": "Tesla T4",
37
+ "memoryTotal": "16106127360",
38
+ "cudaCores": 2560,
39
+ "architecture": "Turing"
40
+ }
41
+ ],
42
+ "cudaVersion": "12.6"
43
+ }
wandb/run-20241205_143913-2mwisme1/logs/debug-core.log ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-05T14:36:34.436169094Z","level":"INFO","msg":"started logging, with flags","port-filename":"/tmp/tmp0_axua70/port-260.txt","pid":260,"debug":false,"disable-analytics":false}
2
+ {"time":"2024-12-05T14:36:34.436205404Z","level":"INFO","msg":"FeatureState","shutdownOnParentExitEnabled":false}
3
+ {"time":"2024-12-05T14:36:34.442826116Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":260}
4
+ {"time":"2024-12-05T14:36:34.442818613Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":46213,"Zone":""}}
5
+ {"time":"2024-12-05T14:36:34.628991059Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:44628"}
6
+ {"time":"2024-12-05T14:39:13.215126888Z","level":"INFO","msg":"handleInformInit: received","streamId":"2mwisme1","id":"127.0.0.1:44628"}
7
+ {"time":"2024-12-05T14:39:15.221780124Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"2mwisme1","id":"127.0.0.1:44628"}
wandb/run-20241205_143913-2mwisme1/logs/debug-internal.log ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2024-12-05T14:39:13.215311083Z","level":"INFO","msg":"using version","core version":"0.18.3"}
2
+ {"time":"2024-12-05T14:39:13.215339406Z","level":"INFO","msg":"created symlink","path":"/kaggle/working/wandb/run-20241205_143913-2mwisme1/logs/debug-core.log"}
3
+ {"time":"2024-12-05T14:39:15.217256419Z","level":"ERROR","msg":"dialing: google: could not find default credentials. See https://cloud.google.com/docs/authentication/external/set-up-adc for more information"}
4
+ {"time":"2024-12-05T14:39:15.221746354Z","level":"INFO","msg":"created new stream","id":"2mwisme1"}
5
+ {"time":"2024-12-05T14:39:15.221774614Z","level":"INFO","msg":"stream: started","id":"2mwisme1"}
6
+ {"time":"2024-12-05T14:39:15.221805326Z","level":"INFO","msg":"writer: Do: started","stream_id":{"value":"2mwisme1"}}
7
+ {"time":"2024-12-05T14:39:15.221843207Z","level":"INFO","msg":"handler: started","stream_id":{"value":"2mwisme1"}}
8
+ {"time":"2024-12-05T14:39:15.221842955Z","level":"INFO","msg":"sender: started","stream_id":{"value":"2mwisme1"}}
9
+ {"time":"2024-12-05T14:39:15.415690183Z","level":"INFO","msg":"wandb-core","!BADKEY":null}
10
+ {"time":"2024-12-05T14:39:15.416772438Z","level":"INFO","msg":"Starting system monitor"}
11
+ {"time":"2024-12-05T14:39:26.759443118Z","level":"INFO","msg":"Pausing system monitor"}
12
+ {"time":"2024-12-05T14:41:15.890079536Z","level":"INFO","msg":"Resuming system monitor"}
13
+ {"time":"2024-12-05T14:41:20.002045749Z","level":"INFO","msg":"Pausing system monitor"}
14
+ {"time":"2024-12-05T14:41:23.883801968Z","level":"INFO","msg":"Resuming system monitor"}
15
+ {"time":"2024-12-05T14:41:24.057644952Z","level":"INFO","msg":"Pausing system monitor"}
16
+ {"time":"2024-12-05T14:41:27.124441713Z","level":"INFO","msg":"Resuming system monitor"}
17
+ {"time":"2024-12-05T14:41:27.164833611Z","level":"INFO","msg":"Pausing system monitor"}
18
+ {"time":"2024-12-05T14:41:30.712386169Z","level":"INFO","msg":"Resuming system monitor"}
19
+ {"time":"2024-12-05T14:41:30.719681016Z","level":"INFO","msg":"Pausing system monitor"}
20
+ {"time":"2024-12-05T14:41:33.978004525Z","level":"INFO","msg":"Resuming system monitor"}
wandb/run-20241205_143913-2mwisme1/logs/debug.log ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Current SDK version is 0.18.3
2
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Configure stats pid to 260
3
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Loading settings from /root/.config/wandb/settings
4
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Loading settings from /kaggle/working/wandb/settings
5
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Loading settings from environment variables: {'api_key': '***REDACTED***'}
6
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Inferring run settings from compute environment: {'program': '<python with no main file>'}
7
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Applying login settings: {}
8
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_setup.py:_flush():79] Applying login settings: {}
9
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:_log_setup():532] Logging user logs to /kaggle/working/wandb/run-20241205_143913-2mwisme1/logs/debug.log
10
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:_log_setup():533] Logging internal logs to /kaggle/working/wandb/run-20241205_143913-2mwisme1/logs/debug-internal.log
11
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:_jupyter_setup():478] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x78025df07a30>
12
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:init():617] calling init triggers
13
+ 2024-12-05 14:39:13,210 INFO MainThread:260 [wandb_init.py:init():624] wandb.init called with sweep_config: {}
14
+ config: {}
15
+ 2024-12-05 14:39:13,211 INFO MainThread:260 [wandb_init.py:init():667] starting backend
16
+ 2024-12-05 14:39:13,211 INFO MainThread:260 [wandb_init.py:init():671] sending inform_init request
17
+ 2024-12-05 14:39:13,212 INFO MainThread:260 [backend.py:_multiprocessing_setup():104] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
18
+ 2024-12-05 14:39:13,212 INFO MainThread:260 [wandb_init.py:init():684] backend started and connected
19
+ 2024-12-05 14:39:13,225 INFO MainThread:260 [wandb_run.py:_label_probe_notebook():1346] probe notebook
20
+ 2024-12-05 14:39:13,734 INFO MainThread:260 [wandb_init.py:init():779] updated telemetry
21
+ 2024-12-05 14:39:13,738 INFO MainThread:260 [wandb_init.py:init():812] communicating run to backend with 90.0 second timeout
22
+ 2024-12-05 14:39:15,412 INFO MainThread:260 [wandb_init.py:init():863] starting run threads in backend
23
+ 2024-12-05 14:39:16,103 INFO MainThread:260 [wandb_run.py:_console_start():2465] atexit reg
24
+ 2024-12-05 14:39:16,103 INFO MainThread:260 [wandb_run.py:_redirect():2313] redirect: wrap_raw
25
+ 2024-12-05 14:39:16,103 INFO MainThread:260 [wandb_run.py:_redirect():2378] Wrapping output streams.
26
+ 2024-12-05 14:39:16,104 INFO MainThread:260 [wandb_run.py:_redirect():2403] Redirects installed.
27
+ 2024-12-05 14:39:16,111 INFO MainThread:260 [wandb_init.py:init():907] run started, returning control to user process
28
+ 2024-12-05 14:39:16,115 INFO MainThread:260 [wandb_run.py:_config_callback():1394] config_cb None None {'peft_config': {'default': {'peft_type': <PeftType.LORA: 'LORA'>, 'auto_mapping': None, 'base_model_name_or_path': 'unsloth/llama-3.2-3b-instruct-bnb-4bit', 'revision': None, 'task_type': <TaskType.CAUSAL_LM: 'CAUSAL_LM'>, 'inference_mode': False, 'r': 16, 'target_modules': {'k_proj', 'v_proj', 'up_proj', 'o_proj', 'down_proj', 'q_proj', 'gate_proj'}, 'lora_alpha': 16, 'lora_dropout': 0, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'loftq_config': {}, 'use_dora': False, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}}}, 'vocab_size': 128256, 'max_position_embeddings': 131072, 'hidden_size': 3072, 'intermediate_size': 8192, 'num_hidden_layers': 28, 'num_attention_heads': 24, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 500000.0, 'rope_scaling': {'factor': 32.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 128, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 128000, 'pad_token_id': 128004, 'eos_token_id': [128001, 128008, 128009], 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'unsloth/llama-3.2-3b-instruct-bnb-4bit', '_attn_implementation_autoset': True, 'transformers_version': '4.47.0.dev0', 'model_type': 'llama', 'quantization_config': {'bnb_4bit_compute_dtype': 'float16', 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'llm_int8_skip_modules': None, 'llm_int8_threshold': 6.0, 'load_in_4bit': True, 'load_in_8bit': False, 'quant_method': 'bitsandbytes'}, 'unsloth_version': '2024.12.2', 'output_dir': '/kaggle/working', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0002, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 5, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Dec05_14-38-49_004ac713e0a4', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': False, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_8bit', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'Llama-3.2-3B-appreciation', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'dataset_text_field': 'text', 'packing': False, 'max_seq_length': 2048, 'dataset_num_proc': 2, 'dataset_batch_size': 1000, 'model_init_kwargs': None, 'dataset_kwargs': {}, 'eval_packing': None, 'num_of_sequences': 1024, 'chars_per_token': '<CHARS_PER_TOKEN>', 'use_liger': False}
29
+ 2024-12-05 14:39:16,125 INFO MainThread:260 [wandb_config.py:__setitem__():154] config set model/num_parameters = 3237063680 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7802a4f3a680>>
30
+ 2024-12-05 14:39:16,125 INFO MainThread:260 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 3237063680 None
31
+ 2024-12-05 14:39:26,758 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
32
+ 2024-12-05 14:39:26,759 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
33
+ 2024-12-05 14:41:15,889 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
34
+ 2024-12-05 14:41:20,001 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
35
+ 2024-12-05 14:41:20,001 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
36
+ 2024-12-05 14:41:23,882 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
37
+ 2024-12-05 14:41:24,057 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
38
+ 2024-12-05 14:41:24,057 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
39
+ 2024-12-05 14:41:27,123 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
40
+ 2024-12-05 14:41:27,130 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
41
+ 2024-12-05 14:41:27,130 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
42
+ 2024-12-05 14:41:30,711 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
43
+ 2024-12-05 14:41:30,719 INFO MainThread:260 [jupyter.py:save_ipynb():387] not saving jupyter notebook
44
+ 2024-12-05 14:41:30,719 INFO MainThread:260 [wandb_init.py:_pause_backend():443] pausing backend
45
+ 2024-12-05 14:41:33,975 INFO MainThread:260 [wandb_init.py:_resume_backend():448] resuming backend
46
+ 2024-12-05 14:41:37,610 INFO MainThread:260 [wandb_run.py:_config_callback():1394] config_cb None None {'peft_config': {'default': {'peft_type': <PeftType.LORA: 'LORA'>, 'auto_mapping': None, 'base_model_name_or_path': 'unsloth/llama-3.2-3b-instruct-bnb-4bit', 'revision': None, 'task_type': <TaskType.CAUSAL_LM: 'CAUSAL_LM'>, 'inference_mode': False, 'r': 16, 'target_modules': {'k_proj', 'v_proj', 'up_proj', 'o_proj', 'down_proj', 'q_proj', 'gate_proj'}, 'lora_alpha': 16, 'lora_dropout': 0, 'fan_in_fan_out': False, 'bias': 'none', 'use_rslora': False, 'modules_to_save': None, 'init_lora_weights': True, 'layers_to_transform': None, 'layers_pattern': None, 'rank_pattern': {}, 'alpha_pattern': {}, 'megatron_config': None, 'megatron_core': 'megatron.core', 'loftq_config': {}, 'use_dora': False, 'layer_replication': None, 'runtime_config': {'ephemeral_gpu_offload': False}}}, 'vocab_size': 128256, 'max_position_embeddings': 131072, 'hidden_size': 3072, 'intermediate_size': 8192, 'num_hidden_layers': 28, 'num_attention_heads': 24, 'num_key_value_heads': 8, 'hidden_act': 'silu', 'initializer_range': 0.02, 'rms_norm_eps': 1e-05, 'pretraining_tp': 1, 'use_cache': True, 'rope_theta': 500000.0, 'rope_scaling': {'factor': 32.0, 'high_freq_factor': 4.0, 'low_freq_factor': 1.0, 'original_max_position_embeddings': 8192, 'rope_type': 'llama3'}, 'attention_bias': False, 'attention_dropout': 0.0, 'mlp_bias': False, 'head_dim': 128, 'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float16', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['LlamaForCausalLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': 128000, 'pad_token_id': 128004, 'eos_token_id': [128001, 128008, 128009], 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'unsloth/llama-3.2-3b-instruct-bnb-4bit', '_attn_implementation_autoset': True, 'transformers_version': '4.47.0.dev0', 'model_type': 'llama', 'quantization_config': {'bnb_4bit_compute_dtype': 'float16', 'bnb_4bit_quant_type': 'nf4', 'bnb_4bit_use_double_quant': True, 'llm_int8_enable_fp32_cpu_offload': False, 'llm_int8_has_fp16_weight': False, 'llm_int8_skip_modules': None, 'llm_int8_threshold': 6.0, 'load_in_4bit': True, 'load_in_8bit': False, 'quant_method': 'bitsandbytes'}, 'unsloth_version': '2024.12.2', 'output_dir': '/kaggle/working', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'epoch', 'prediction_loss_only': False, 'per_device_train_batch_size': 2, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 4, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 0.0002, 'weight_decay': 0.01, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.0, 'warmup_steps': 5, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/Dec05_14-41-15_004ac713e0a4', 'logging_strategy': 'epoch', 'logging_first_step': False, 'logging_steps': 1, 'logging_nan_inf_filter': True, 'save_strategy': 'epoch', 'save_steps': 500, 'save_total_limit': None, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 3407, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': None, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': False, 'metric_for_best_model': None, 'greater_is_better': None, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_8bit', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': 'Llama-3.2-3B-appreciation', 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'evaluation_strategy': None, 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'dispatch_batches': None, 'split_batches': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False, 'dataset_text_field': 'text', 'packing': False, 'max_seq_length': 2048, 'dataset_num_proc': 2, 'dataset_batch_size': 1000, 'model_init_kwargs': None, 'dataset_kwargs': {}, 'eval_packing': None, 'num_of_sequences': 1024, 'chars_per_token': '<CHARS_PER_TOKEN>', 'use_liger': False}
47
+ 2024-12-05 14:41:37,619 INFO MainThread:260 [wandb_config.py:__setitem__():154] config set model/num_parameters = 3237063680 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7802a4f3a680>>
48
+ 2024-12-05 14:41:37,619 INFO MainThread:260 [wandb_run.py:_config_callback():1394] config_cb model/num_parameters 3237063680 None
wandb/run-20241205_143913-2mwisme1/run-2mwisme1.wandb ADDED
Binary file (32.8 kB). View file