diff --git "a/nohup.out" "b/nohup.out" --- "a/nohup.out" +++ "b/nohup.out" @@ -49116,3 +49116,24575 @@ weight_decay=0.01, [20:42:12] - INFO - huggingface_hub.repository - git version 2.25.1 git-lfs/2.9.2 (GitHub; linux amd64; go 1.13.5) [20:42:12] - DEBUG - huggingface_hub.repository - [Repository] is a valid git repo + Epoch... (1/18 | Loss: 2.357403039932251, Acc: 0.5462022423744202): 0%| | 0/18 [2:38:46 + model.save_pretrained( + File "/home/wilso/transformers/src/transformers/modeling_flax_utils.py", line 456, in save_pretrained + url = self._push_to_hub(repo, commit_message=commit_message) + File "/home/wilso/transformers/src/transformers/file_utils.py", line 2107, in _push_to_hub + return repo.push_to_hub(commit_message=commit_message) + File "/home/wilso/hf/lib/python3.8/site-packages/huggingface_hub/repository.py", line 434, in push_to_hub + return self.git_push() + File "/home/wilso/hf/lib/python3.8/site-packages/huggingface_hub/repository.py", line 422, in git_push + raise EnvironmentError(exc.stderr) +OSError: fatal: could not read Username for 'https://huggingface.co': No such device or address + +2021-07-07 03:56:49.927723: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory +[03:56:50] - WARNING - __main__ - Process rank: -1, device: cpu, n_gpu: 0distributed training: False, 16-bits training: False +[03:56:50] - INFO - __main__ - Training/evaluation parameters TrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.98, +adam_epsilon=1e-08, +dataloader_drop_last=False, +dataloader_num_workers=0, +dataloader_pin_memory=True, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=False, +do_predict=False, +do_train=False, +eval_accumulation_steps=None, +eval_steps=500, +evaluation_strategy=IntervalStrategy.NO, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=1, +greater_is_better=None, +group_by_length=False, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0003, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=./runs/Jul07_03-56-50_t1v-n-b95d739e-w-0, +logging_first_step=False, +logging_steps=500, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=18.0, +output_dir=./, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=128, +per_device_train_batch_size=128, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=, +push_to_hub_organization=None, +push_to_hub_token=None, +remove_unused_columns=True, +report_to=['tensorboard'], +resume_from_checkpoint=None, +run_name=./, +save_on_each_node=False, +save_steps=500, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=1000, +weight_decay=0.01, +) +[03:56:51] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 +[03:56:51] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 +[03:56:51] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[03:56:51] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 +[03:56:51] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[03:56:51] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 +[03:56:51] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +[03:56:51] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 +[03:56:52] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 +[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[03:56:52] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 +[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[03:56:52] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 +[03:56:52] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): s3.amazonaws.com:443 +[03:56:52] - DEBUG - urllib3.connectionpool - https://s3.amazonaws.com:443 "HEAD /datasets.huggingface.co/datasets/datasets/oscar/oscar.py HTTP/1.1" 404 0 +[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[03:56:52] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/oscar.py HTTP/1.1" 200 0 +[03:56:52] - DEBUG - urllib3.connectionpool - Starting new HTTPS connection (1): raw.githubusercontent.com:443 +[03:56:52] - DEBUG - urllib3.connectionpool - https://raw.githubusercontent.com:443 "HEAD /huggingface/datasets/master/datasets/oscar/dataset_infos.json HTTP/1.1" 200 0 +[03:56:52] - WARNING - datasets.builder - Reusing dataset oscar (/home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +[03:56:52] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-484e12c5eef7e8e7.arrow +[03:56:53] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-c71dccca2ce1349d.arrow +[03:56:53] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-cddb40395f5104e7.arrow +[03:56:54] - WARNING - datasets.arrow_dataset - Loading cached processed dataset at /home/wilso/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2/cache-e6293c8aef77ae9a.arrow +[03:56:54] - INFO - absl - Starting the local TPU driver. +[03:56:54] - INFO - absl - Unable to initialize backend 'tpu_driver': Not found: Unable to find driver in registry given worker: local:// +[03:56:54] - INFO - absl - Unable to initialize backend 'gpu': Not found: Could not find registered platform with name: "cuda". Available platform names are: Host Interpreter TPU +2021-07-07 03:56:57.424057: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory +2021-07-07 03:56:57.424162: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303) +/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:382: UserWarning: jax.host_count has been renamed to jax.process_count. This alias will eventually be removed; please update your code. + warnings.warn( +/home/wilso/hf/lib/python3.8/site-packages/jax/lib/xla_bridge.py:369: UserWarning: jax.host_id has been renamed to jax.process_index. This alias will eventually be removed; please update your code. + warnings.warn( + Epoch ... (1/18): 0%| | 0/18 [00:00