diff --git a/lib/transformers/src/transformers.egg-info/PKG-INFO b/lib/transformers/src/transformers.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..a1937e9c346a87da83206f81eda66f6a56456819 --- /dev/null +++ b/lib/transformers/src/transformers.egg-info/PKG-INFO @@ -0,0 +1,766 @@ +Metadata-Version: 2.1 +Name: transformers +Version: 4.43.3 +Summary: State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow +Home-page: https://github.com/huggingface/transformers +Author: The Hugging Face team (past and future) with the help of all our contributors (https://github.com/huggingface/transformers/graphs/contributors) +Author-email: transformers@huggingface.co +License: Apache 2.0 License +Keywords: NLP vision speech deep learning transformer pytorch tensorflow jax BERT GPT-2 Wav2Vec2 ViT +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Requires-Python: >=3.8.0 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: filelock +Requires-Dist: huggingface-hub<1.0,>=0.23.2 +Requires-Dist: numpy>=1.17 +Requires-Dist: packaging>=20.0 +Requires-Dist: pyyaml>=5.1 +Requires-Dist: regex!=2019.12.17 +Requires-Dist: requests +Requires-Dist: tokenizers<0.20,>=0.19 +Requires-Dist: safetensors>=0.4.1 +Requires-Dist: tqdm>=4.27 +Provides-Extra: ja +Requires-Dist: fugashi>=1.0; extra == "ja" +Requires-Dist: ipadic<2.0,>=1.0.0; extra == "ja" +Requires-Dist: unidic_lite>=1.0.7; extra == "ja" +Requires-Dist: unidic>=1.0.2; extra == "ja" +Requires-Dist: sudachipy>=0.6.6; extra == "ja" +Requires-Dist: sudachidict_core>=20220729; extra == "ja" +Requires-Dist: rhoknp<1.3.1,>=1.1.0; extra == "ja" +Provides-Extra: sklearn +Requires-Dist: scikit-learn; extra == "sklearn" +Provides-Extra: tf +Requires-Dist: tensorflow<2.16,>2.9; extra == "tf" +Requires-Dist: onnxconverter-common; extra == "tf" +Requires-Dist: tf2onnx; extra == "tf" +Requires-Dist: tensorflow-text<2.16; extra == "tf" +Requires-Dist: keras-nlp<0.14.0,>=0.3.1; extra == "tf" +Provides-Extra: tf-cpu +Requires-Dist: keras<2.16,>2.9; extra == "tf-cpu" +Requires-Dist: tensorflow-cpu<2.16,>2.9; extra == "tf-cpu" +Requires-Dist: onnxconverter-common; extra == "tf-cpu" +Requires-Dist: tf2onnx; extra == "tf-cpu" +Requires-Dist: tensorflow-text<2.16; extra == "tf-cpu" +Requires-Dist: keras-nlp<0.14.0,>=0.3.1; extra == "tf-cpu" +Requires-Dist: tensorflow-probability<0.24; extra == "tf-cpu" +Provides-Extra: torch +Requires-Dist: torch; extra == "torch" +Requires-Dist: accelerate>=0.21.0; extra == "torch" +Provides-Extra: accelerate +Requires-Dist: accelerate>=0.21.0; extra == "accelerate" +Provides-Extra: retrieval +Requires-Dist: faiss-cpu; extra == "retrieval" +Requires-Dist: datasets!=2.5.0; extra == "retrieval" +Provides-Extra: flax +Requires-Dist: jax<=0.4.13,>=0.4.1; extra == "flax" +Requires-Dist: jaxlib<=0.4.13,>=0.4.1; extra == "flax" +Requires-Dist: flax<=0.7.0,>=0.4.1; extra == "flax" +Requires-Dist: optax<=0.1.4,>=0.0.8; extra == "flax" +Requires-Dist: scipy<1.13.0; extra == "flax" +Provides-Extra: tokenizers +Requires-Dist: tokenizers<0.20,>=0.19; extra == "tokenizers" +Provides-Extra: ftfy +Requires-Dist: ftfy; extra == "ftfy" +Provides-Extra: onnxruntime +Requires-Dist: onnxruntime>=1.4.0; extra == "onnxruntime" +Requires-Dist: onnxruntime-tools>=1.4.2; extra == "onnxruntime" +Provides-Extra: onnx +Requires-Dist: onnxconverter-common; extra == "onnx" +Requires-Dist: tf2onnx; extra == "onnx" +Requires-Dist: onnxruntime>=1.4.0; extra == "onnx" +Requires-Dist: onnxruntime-tools>=1.4.2; extra == "onnx" +Provides-Extra: modelcreation +Requires-Dist: cookiecutter==1.7.3; extra == "modelcreation" +Provides-Extra: sagemaker +Requires-Dist: sagemaker>=2.31.0; extra == "sagemaker" +Provides-Extra: deepspeed +Requires-Dist: deepspeed>=0.9.3; extra == "deepspeed" +Requires-Dist: accelerate>=0.21.0; extra == "deepspeed" +Provides-Extra: optuna +Requires-Dist: optuna; extra == "optuna" +Provides-Extra: ray +Requires-Dist: ray[tune]>=2.7.0; extra == "ray" +Provides-Extra: sigopt +Requires-Dist: sigopt; extra == "sigopt" +Provides-Extra: integrations +Requires-Dist: optuna; extra == "integrations" +Requires-Dist: ray[tune]>=2.7.0; extra == "integrations" +Requires-Dist: sigopt; extra == "integrations" +Provides-Extra: serving +Requires-Dist: pydantic; extra == "serving" +Requires-Dist: uvicorn; extra == "serving" +Requires-Dist: fastapi; extra == "serving" +Requires-Dist: starlette; extra == "serving" +Provides-Extra: audio +Requires-Dist: librosa; extra == "audio" +Requires-Dist: pyctcdecode>=0.4.0; extra == "audio" +Requires-Dist: phonemizer; extra == "audio" +Requires-Dist: kenlm; extra == "audio" +Provides-Extra: speech +Requires-Dist: torchaudio; extra == "speech" +Requires-Dist: librosa; extra == "speech" +Requires-Dist: pyctcdecode>=0.4.0; extra == "speech" +Requires-Dist: phonemizer; extra == "speech" +Requires-Dist: kenlm; extra == "speech" +Provides-Extra: torch-speech +Requires-Dist: torchaudio; extra == "torch-speech" +Requires-Dist: librosa; extra == "torch-speech" +Requires-Dist: pyctcdecode>=0.4.0; extra == "torch-speech" +Requires-Dist: phonemizer; extra == "torch-speech" +Requires-Dist: kenlm; extra == "torch-speech" +Provides-Extra: tf-speech +Requires-Dist: librosa; extra == "tf-speech" +Requires-Dist: pyctcdecode>=0.4.0; extra == "tf-speech" +Requires-Dist: phonemizer; extra == "tf-speech" +Requires-Dist: kenlm; extra == "tf-speech" +Provides-Extra: flax-speech +Requires-Dist: librosa; extra == "flax-speech" +Requires-Dist: pyctcdecode>=0.4.0; extra == "flax-speech" +Requires-Dist: phonemizer; extra == "flax-speech" +Requires-Dist: kenlm; extra == "flax-speech" +Provides-Extra: vision +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "vision" +Provides-Extra: timm +Requires-Dist: timm<=0.9.16; extra == "timm" +Provides-Extra: torch-vision +Requires-Dist: torchvision; extra == "torch-vision" +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "torch-vision" +Provides-Extra: natten +Requires-Dist: natten<0.15.0,>=0.14.6; extra == "natten" +Provides-Extra: codecarbon +Requires-Dist: codecarbon==1.2.0; extra == "codecarbon" +Provides-Extra: video +Requires-Dist: decord==0.6.0; extra == "video" +Requires-Dist: av==9.2.0; extra == "video" +Provides-Extra: sentencepiece +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "sentencepiece" +Requires-Dist: protobuf; extra == "sentencepiece" +Provides-Extra: testing +Requires-Dist: pytest<8.0.0,>=7.2.0; extra == "testing" +Requires-Dist: pytest-rich; extra == "testing" +Requires-Dist: pytest-xdist; extra == "testing" +Requires-Dist: timeout-decorator; extra == "testing" +Requires-Dist: parameterized; extra == "testing" +Requires-Dist: psutil; extra == "testing" +Requires-Dist: datasets!=2.5.0; extra == "testing" +Requires-Dist: dill<0.3.5; extra == "testing" +Requires-Dist: evaluate>=0.2.0; extra == "testing" +Requires-Dist: pytest-timeout; extra == "testing" +Requires-Dist: ruff==0.4.4; extra == "testing" +Requires-Dist: sacrebleu<2.0.0,>=1.4.12; extra == "testing" +Requires-Dist: rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1; extra == "testing" +Requires-Dist: nltk; extra == "testing" +Requires-Dist: GitPython<3.1.19; extra == "testing" +Requires-Dist: sacremoses; extra == "testing" +Requires-Dist: rjieba; extra == "testing" +Requires-Dist: beautifulsoup4; extra == "testing" +Requires-Dist: tensorboard; extra == "testing" +Requires-Dist: pydantic; extra == "testing" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "testing" +Requires-Dist: faiss-cpu; extra == "testing" +Requires-Dist: datasets!=2.5.0; extra == "testing" +Requires-Dist: cookiecutter==1.7.3; extra == "testing" +Provides-Extra: deepspeed-testing +Requires-Dist: deepspeed>=0.9.3; extra == "deepspeed-testing" +Requires-Dist: accelerate>=0.21.0; extra == "deepspeed-testing" +Requires-Dist: pytest<8.0.0,>=7.2.0; extra == "deepspeed-testing" +Requires-Dist: pytest-rich; extra == "deepspeed-testing" +Requires-Dist: pytest-xdist; extra == "deepspeed-testing" +Requires-Dist: timeout-decorator; extra == "deepspeed-testing" +Requires-Dist: parameterized; extra == "deepspeed-testing" +Requires-Dist: psutil; extra == "deepspeed-testing" +Requires-Dist: datasets!=2.5.0; extra == "deepspeed-testing" +Requires-Dist: dill<0.3.5; extra == "deepspeed-testing" +Requires-Dist: evaluate>=0.2.0; extra == "deepspeed-testing" +Requires-Dist: pytest-timeout; extra == "deepspeed-testing" +Requires-Dist: ruff==0.4.4; extra == "deepspeed-testing" +Requires-Dist: sacrebleu<2.0.0,>=1.4.12; extra == "deepspeed-testing" +Requires-Dist: rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1; extra == "deepspeed-testing" +Requires-Dist: nltk; extra == "deepspeed-testing" +Requires-Dist: GitPython<3.1.19; extra == "deepspeed-testing" +Requires-Dist: sacremoses; extra == "deepspeed-testing" +Requires-Dist: rjieba; extra == "deepspeed-testing" +Requires-Dist: beautifulsoup4; extra == "deepspeed-testing" +Requires-Dist: tensorboard; extra == "deepspeed-testing" +Requires-Dist: pydantic; extra == "deepspeed-testing" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "deepspeed-testing" +Requires-Dist: faiss-cpu; extra == "deepspeed-testing" +Requires-Dist: datasets!=2.5.0; extra == "deepspeed-testing" +Requires-Dist: cookiecutter==1.7.3; extra == "deepspeed-testing" +Requires-Dist: optuna; extra == "deepspeed-testing" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "deepspeed-testing" +Requires-Dist: protobuf; extra == "deepspeed-testing" +Provides-Extra: ruff +Requires-Dist: ruff==0.4.4; extra == "ruff" +Provides-Extra: quality +Requires-Dist: datasets!=2.5.0; extra == "quality" +Requires-Dist: isort>=5.5.4; extra == "quality" +Requires-Dist: ruff==0.4.4; extra == "quality" +Requires-Dist: GitPython<3.1.19; extra == "quality" +Requires-Dist: urllib3<2.0.0; extra == "quality" +Provides-Extra: all +Requires-Dist: tensorflow<2.16,>2.9; extra == "all" +Requires-Dist: onnxconverter-common; extra == "all" +Requires-Dist: tf2onnx; extra == "all" +Requires-Dist: tensorflow-text<2.16; extra == "all" +Requires-Dist: keras-nlp<0.14.0,>=0.3.1; extra == "all" +Requires-Dist: torch; extra == "all" +Requires-Dist: accelerate>=0.21.0; extra == "all" +Requires-Dist: jax<=0.4.13,>=0.4.1; extra == "all" +Requires-Dist: jaxlib<=0.4.13,>=0.4.1; extra == "all" +Requires-Dist: flax<=0.7.0,>=0.4.1; extra == "all" +Requires-Dist: optax<=0.1.4,>=0.0.8; extra == "all" +Requires-Dist: scipy<1.13.0; extra == "all" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "all" +Requires-Dist: protobuf; extra == "all" +Requires-Dist: tokenizers<0.20,>=0.19; extra == "all" +Requires-Dist: torchaudio; extra == "all" +Requires-Dist: librosa; extra == "all" +Requires-Dist: pyctcdecode>=0.4.0; extra == "all" +Requires-Dist: phonemizer; extra == "all" +Requires-Dist: kenlm; extra == "all" +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "all" +Requires-Dist: optuna; extra == "all" +Requires-Dist: ray[tune]>=2.7.0; extra == "all" +Requires-Dist: sigopt; extra == "all" +Requires-Dist: timm<=0.9.16; extra == "all" +Requires-Dist: torchvision; extra == "all" +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "all" +Requires-Dist: codecarbon==1.2.0; extra == "all" +Requires-Dist: accelerate>=0.21.0; extra == "all" +Requires-Dist: decord==0.6.0; extra == "all" +Requires-Dist: av==9.2.0; extra == "all" +Provides-Extra: dev-torch +Requires-Dist: pytest<8.0.0,>=7.2.0; extra == "dev-torch" +Requires-Dist: pytest-rich; extra == "dev-torch" +Requires-Dist: pytest-xdist; extra == "dev-torch" +Requires-Dist: timeout-decorator; extra == "dev-torch" +Requires-Dist: parameterized; extra == "dev-torch" +Requires-Dist: psutil; extra == "dev-torch" +Requires-Dist: datasets!=2.5.0; extra == "dev-torch" +Requires-Dist: dill<0.3.5; extra == "dev-torch" +Requires-Dist: evaluate>=0.2.0; extra == "dev-torch" +Requires-Dist: pytest-timeout; extra == "dev-torch" +Requires-Dist: ruff==0.4.4; extra == "dev-torch" +Requires-Dist: sacrebleu<2.0.0,>=1.4.12; extra == "dev-torch" +Requires-Dist: rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1; extra == "dev-torch" +Requires-Dist: nltk; extra == "dev-torch" +Requires-Dist: GitPython<3.1.19; extra == "dev-torch" +Requires-Dist: sacremoses; extra == "dev-torch" +Requires-Dist: rjieba; extra == "dev-torch" +Requires-Dist: beautifulsoup4; extra == "dev-torch" +Requires-Dist: tensorboard; extra == "dev-torch" +Requires-Dist: pydantic; extra == "dev-torch" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "dev-torch" +Requires-Dist: faiss-cpu; extra == "dev-torch" +Requires-Dist: datasets!=2.5.0; extra == "dev-torch" +Requires-Dist: cookiecutter==1.7.3; extra == "dev-torch" +Requires-Dist: torch; extra == "dev-torch" +Requires-Dist: accelerate>=0.21.0; extra == "dev-torch" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "dev-torch" +Requires-Dist: protobuf; extra == "dev-torch" +Requires-Dist: tokenizers<0.20,>=0.19; extra == "dev-torch" +Requires-Dist: torchaudio; extra == "dev-torch" +Requires-Dist: librosa; extra == "dev-torch" +Requires-Dist: pyctcdecode>=0.4.0; extra == "dev-torch" +Requires-Dist: phonemizer; extra == "dev-torch" +Requires-Dist: kenlm; extra == "dev-torch" +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "dev-torch" +Requires-Dist: optuna; extra == "dev-torch" +Requires-Dist: ray[tune]>=2.7.0; extra == "dev-torch" +Requires-Dist: sigopt; extra == "dev-torch" +Requires-Dist: timm<=0.9.16; extra == "dev-torch" +Requires-Dist: torchvision; extra == "dev-torch" +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "dev-torch" +Requires-Dist: codecarbon==1.2.0; extra == "dev-torch" +Requires-Dist: datasets!=2.5.0; extra == "dev-torch" +Requires-Dist: isort>=5.5.4; extra == "dev-torch" +Requires-Dist: ruff==0.4.4; extra == "dev-torch" +Requires-Dist: GitPython<3.1.19; extra == "dev-torch" +Requires-Dist: urllib3<2.0.0; extra == "dev-torch" +Requires-Dist: fugashi>=1.0; extra == "dev-torch" +Requires-Dist: ipadic<2.0,>=1.0.0; extra == "dev-torch" +Requires-Dist: unidic_lite>=1.0.7; extra == "dev-torch" +Requires-Dist: unidic>=1.0.2; extra == "dev-torch" +Requires-Dist: sudachipy>=0.6.6; extra == "dev-torch" +Requires-Dist: sudachidict_core>=20220729; extra == "dev-torch" +Requires-Dist: rhoknp<1.3.1,>=1.1.0; extra == "dev-torch" +Requires-Dist: scikit-learn; extra == "dev-torch" +Requires-Dist: cookiecutter==1.7.3; extra == "dev-torch" +Requires-Dist: onnxruntime>=1.4.0; extra == "dev-torch" +Requires-Dist: onnxruntime-tools>=1.4.2; extra == "dev-torch" +Provides-Extra: dev-tensorflow +Requires-Dist: pytest<8.0.0,>=7.2.0; extra == "dev-tensorflow" +Requires-Dist: pytest-rich; extra == "dev-tensorflow" +Requires-Dist: pytest-xdist; extra == "dev-tensorflow" +Requires-Dist: timeout-decorator; extra == "dev-tensorflow" +Requires-Dist: parameterized; extra == "dev-tensorflow" +Requires-Dist: psutil; extra == "dev-tensorflow" +Requires-Dist: datasets!=2.5.0; extra == "dev-tensorflow" +Requires-Dist: dill<0.3.5; extra == "dev-tensorflow" +Requires-Dist: evaluate>=0.2.0; extra == "dev-tensorflow" +Requires-Dist: pytest-timeout; extra == "dev-tensorflow" +Requires-Dist: ruff==0.4.4; extra == "dev-tensorflow" +Requires-Dist: sacrebleu<2.0.0,>=1.4.12; extra == "dev-tensorflow" +Requires-Dist: rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1; extra == "dev-tensorflow" +Requires-Dist: nltk; extra == "dev-tensorflow" +Requires-Dist: GitPython<3.1.19; extra == "dev-tensorflow" +Requires-Dist: sacremoses; extra == "dev-tensorflow" +Requires-Dist: rjieba; extra == "dev-tensorflow" +Requires-Dist: beautifulsoup4; extra == "dev-tensorflow" +Requires-Dist: tensorboard; extra == "dev-tensorflow" +Requires-Dist: pydantic; extra == "dev-tensorflow" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "dev-tensorflow" +Requires-Dist: faiss-cpu; extra == "dev-tensorflow" +Requires-Dist: datasets!=2.5.0; extra == "dev-tensorflow" +Requires-Dist: cookiecutter==1.7.3; extra == "dev-tensorflow" +Requires-Dist: tensorflow<2.16,>2.9; extra == "dev-tensorflow" +Requires-Dist: onnxconverter-common; extra == "dev-tensorflow" +Requires-Dist: tf2onnx; extra == "dev-tensorflow" +Requires-Dist: tensorflow-text<2.16; extra == "dev-tensorflow" +Requires-Dist: keras-nlp<0.14.0,>=0.3.1; extra == "dev-tensorflow" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "dev-tensorflow" +Requires-Dist: protobuf; extra == "dev-tensorflow" +Requires-Dist: tokenizers<0.20,>=0.19; extra == "dev-tensorflow" +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "dev-tensorflow" +Requires-Dist: datasets!=2.5.0; extra == "dev-tensorflow" +Requires-Dist: isort>=5.5.4; extra == "dev-tensorflow" +Requires-Dist: ruff==0.4.4; extra == "dev-tensorflow" +Requires-Dist: GitPython<3.1.19; extra == "dev-tensorflow" +Requires-Dist: urllib3<2.0.0; extra == "dev-tensorflow" +Requires-Dist: scikit-learn; extra == "dev-tensorflow" +Requires-Dist: cookiecutter==1.7.3; extra == "dev-tensorflow" +Requires-Dist: onnxconverter-common; extra == "dev-tensorflow" +Requires-Dist: tf2onnx; extra == "dev-tensorflow" +Requires-Dist: onnxruntime>=1.4.0; extra == "dev-tensorflow" +Requires-Dist: onnxruntime-tools>=1.4.2; extra == "dev-tensorflow" +Requires-Dist: librosa; extra == "dev-tensorflow" +Requires-Dist: pyctcdecode>=0.4.0; extra == "dev-tensorflow" +Requires-Dist: phonemizer; extra == "dev-tensorflow" +Requires-Dist: kenlm; extra == "dev-tensorflow" +Provides-Extra: dev +Requires-Dist: tensorflow<2.16,>2.9; extra == "dev" +Requires-Dist: onnxconverter-common; extra == "dev" +Requires-Dist: tf2onnx; extra == "dev" +Requires-Dist: tensorflow-text<2.16; extra == "dev" +Requires-Dist: keras-nlp<0.14.0,>=0.3.1; extra == "dev" +Requires-Dist: torch; extra == "dev" +Requires-Dist: accelerate>=0.21.0; extra == "dev" +Requires-Dist: jax<=0.4.13,>=0.4.1; extra == "dev" +Requires-Dist: jaxlib<=0.4.13,>=0.4.1; extra == "dev" +Requires-Dist: flax<=0.7.0,>=0.4.1; extra == "dev" +Requires-Dist: optax<=0.1.4,>=0.0.8; extra == "dev" +Requires-Dist: scipy<1.13.0; extra == "dev" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "dev" +Requires-Dist: protobuf; extra == "dev" +Requires-Dist: tokenizers<0.20,>=0.19; extra == "dev" +Requires-Dist: torchaudio; extra == "dev" +Requires-Dist: librosa; extra == "dev" +Requires-Dist: pyctcdecode>=0.4.0; extra == "dev" +Requires-Dist: phonemizer; extra == "dev" +Requires-Dist: kenlm; extra == "dev" +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "dev" +Requires-Dist: optuna; extra == "dev" +Requires-Dist: ray[tune]>=2.7.0; extra == "dev" +Requires-Dist: sigopt; extra == "dev" +Requires-Dist: timm<=0.9.16; extra == "dev" +Requires-Dist: torchvision; extra == "dev" +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "dev" +Requires-Dist: codecarbon==1.2.0; extra == "dev" +Requires-Dist: accelerate>=0.21.0; extra == "dev" +Requires-Dist: decord==0.6.0; extra == "dev" +Requires-Dist: av==9.2.0; extra == "dev" +Requires-Dist: pytest<8.0.0,>=7.2.0; extra == "dev" +Requires-Dist: pytest-rich; extra == "dev" +Requires-Dist: pytest-xdist; extra == "dev" +Requires-Dist: timeout-decorator; extra == "dev" +Requires-Dist: parameterized; extra == "dev" +Requires-Dist: psutil; extra == "dev" +Requires-Dist: datasets!=2.5.0; extra == "dev" +Requires-Dist: dill<0.3.5; extra == "dev" +Requires-Dist: evaluate>=0.2.0; extra == "dev" +Requires-Dist: pytest-timeout; extra == "dev" +Requires-Dist: ruff==0.4.4; extra == "dev" +Requires-Dist: sacrebleu<2.0.0,>=1.4.12; extra == "dev" +Requires-Dist: rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1; extra == "dev" +Requires-Dist: nltk; extra == "dev" +Requires-Dist: GitPython<3.1.19; extra == "dev" +Requires-Dist: sacremoses; extra == "dev" +Requires-Dist: rjieba; extra == "dev" +Requires-Dist: beautifulsoup4; extra == "dev" +Requires-Dist: tensorboard; extra == "dev" +Requires-Dist: pydantic; extra == "dev" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "dev" +Requires-Dist: faiss-cpu; extra == "dev" +Requires-Dist: datasets!=2.5.0; extra == "dev" +Requires-Dist: cookiecutter==1.7.3; extra == "dev" +Requires-Dist: datasets!=2.5.0; extra == "dev" +Requires-Dist: isort>=5.5.4; extra == "dev" +Requires-Dist: ruff==0.4.4; extra == "dev" +Requires-Dist: GitPython<3.1.19; extra == "dev" +Requires-Dist: urllib3<2.0.0; extra == "dev" +Requires-Dist: fugashi>=1.0; extra == "dev" +Requires-Dist: ipadic<2.0,>=1.0.0; extra == "dev" +Requires-Dist: unidic_lite>=1.0.7; extra == "dev" +Requires-Dist: unidic>=1.0.2; extra == "dev" +Requires-Dist: sudachipy>=0.6.6; extra == "dev" +Requires-Dist: sudachidict_core>=20220729; extra == "dev" +Requires-Dist: rhoknp<1.3.1,>=1.1.0; extra == "dev" +Requires-Dist: scikit-learn; extra == "dev" +Requires-Dist: cookiecutter==1.7.3; extra == "dev" +Provides-Extra: torchhub +Requires-Dist: filelock; extra == "torchhub" +Requires-Dist: huggingface-hub<1.0,>=0.23.2; extra == "torchhub" +Requires-Dist: importlib_metadata; extra == "torchhub" +Requires-Dist: numpy>=1.17; extra == "torchhub" +Requires-Dist: packaging>=20.0; extra == "torchhub" +Requires-Dist: protobuf; extra == "torchhub" +Requires-Dist: regex!=2019.12.17; extra == "torchhub" +Requires-Dist: requests; extra == "torchhub" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "torchhub" +Requires-Dist: torch; extra == "torchhub" +Requires-Dist: tokenizers<0.20,>=0.19; extra == "torchhub" +Requires-Dist: tqdm>=4.27; extra == "torchhub" +Provides-Extra: agents +Requires-Dist: diffusers; extra == "agents" +Requires-Dist: accelerate>=0.21.0; extra == "agents" +Requires-Dist: datasets!=2.5.0; extra == "agents" +Requires-Dist: torch; extra == "agents" +Requires-Dist: sentencepiece!=0.1.92,>=0.1.91; extra == "agents" +Requires-Dist: opencv-python; extra == "agents" +Requires-Dist: Pillow<=15.0,>=10.0.1; extra == "agents" +Provides-Extra: benchmark +Requires-Dist: optimum-benchmark>=0.2.0; extra == "benchmark" + + + +

+ + + + Hugging Face Transformers Library + +
+
+

+ +

+ Build + GitHub + Documentation + GitHub release + Contributor Covenant + DOI +

+ +

+

+ English | + 简体中文 | + 繁體中文 | + 한국어 | + Español | + 日本語 | + हिन्दी | + Русский | + Рortuguês | + తెలుగు | + Français | + Deutsch | + Tiếng Việt | +

+

+ +

+

State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow

+

+ +

+ +

+ +🤗 Transformers provides thousands of pretrained models to perform tasks on different modalities such as text, vision, and audio. + +These models can be applied on: + +* 📝 Text, for tasks like text classification, information extraction, question answering, summarization, translation, and text generation, in over 100 languages. +* 🖼️ Images, for tasks like image classification, object detection, and segmentation. +* 🗣️ Audio, for tasks like speech recognition and audio classification. + +Transformer models can also perform tasks on **several modalities combined**, such as table question answering, optical character recognition, information extraction from scanned documents, video classification, and visual question answering. + +🤗 Transformers provides APIs to quickly download and use those pretrained models on a given text, fine-tune them on your own datasets and then share them with the community on our [model hub](https://huggingface.co/models). At the same time, each python module defining an architecture is fully standalone and can be modified to enable quick research experiments. + +🤗 Transformers is backed by the three most popular deep learning libraries — [Jax](https://jax.readthedocs.io/en/latest/), [PyTorch](https://pytorch.org/) and [TensorFlow](https://www.tensorflow.org/) — with a seamless integration between them. It's straightforward to train your models with one before loading them for inference with the other. + +## Online demos + +You can test most of our models directly on their pages from the [model hub](https://huggingface.co/models). We also offer [private model hosting, versioning, & an inference API](https://huggingface.co/pricing) for public and private models. + +Here are a few examples: + +In Natural Language Processing: +- [Masked word completion with BERT](https://huggingface.co/google-bert/bert-base-uncased?text=Paris+is+the+%5BMASK%5D+of+France) +- [Named Entity Recognition with Electra](https://huggingface.co/dbmdz/electra-large-discriminator-finetuned-conll03-english?text=My+name+is+Sarah+and+I+live+in+London+city) +- [Text generation with Mistral](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) +- [Natural Language Inference with RoBERTa](https://huggingface.co/FacebookAI/roberta-large-mnli?text=The+dog+was+lost.+Nobody+lost+any+animal) +- [Summarization with BART](https://huggingface.co/facebook/bart-large-cnn?text=The+tower+is+324+metres+%281%2C063+ft%29+tall%2C+about+the+same+height+as+an+81-storey+building%2C+and+the+tallest+structure+in+Paris.+Its+base+is+square%2C+measuring+125+metres+%28410+ft%29+on+each+side.+During+its+construction%2C+the+Eiffel+Tower+surpassed+the+Washington+Monument+to+become+the+tallest+man-made+structure+in+the+world%2C+a+title+it+held+for+41+years+until+the+Chrysler+Building+in+New+York+City+was+finished+in+1930.+It+was+the+first+structure+to+reach+a+height+of+300+metres.+Due+to+the+addition+of+a+broadcasting+aerial+at+the+top+of+the+tower+in+1957%2C+it+is+now+taller+than+the+Chrysler+Building+by+5.2+metres+%2817+ft%29.+Excluding+transmitters%2C+the+Eiffel+Tower+is+the+second+tallest+free-standing+structure+in+France+after+the+Millau+Viaduct) +- [Question answering with DistilBERT](https://huggingface.co/distilbert/distilbert-base-uncased-distilled-squad?text=Which+name+is+also+used+to+describe+the+Amazon+rainforest+in+English%3F&context=The+Amazon+rainforest+%28Portuguese%3A+Floresta+Amaz%C3%B4nica+or+Amaz%C3%B4nia%3B+Spanish%3A+Selva+Amaz%C3%B3nica%2C+Amazon%C3%ADa+or+usually+Amazonia%3B+French%3A+For%C3%AAt+amazonienne%3B+Dutch%3A+Amazoneregenwoud%29%2C+also+known+in+English+as+Amazonia+or+the+Amazon+Jungle%2C+is+a+moist+broadleaf+forest+that+covers+most+of+the+Amazon+basin+of+South+America.+This+basin+encompasses+7%2C000%2C000+square+kilometres+%282%2C700%2C000+sq+mi%29%2C+of+which+5%2C500%2C000+square+kilometres+%282%2C100%2C000+sq+mi%29+are+covered+by+the+rainforest.+This+region+includes+territory+belonging+to+nine+nations.+The+majority+of+the+forest+is+contained+within+Brazil%2C+with+60%25+of+the+rainforest%2C+followed+by+Peru+with+13%25%2C+Colombia+with+10%25%2C+and+with+minor+amounts+in+Venezuela%2C+Ecuador%2C+Bolivia%2C+Guyana%2C+Suriname+and+French+Guiana.+States+or+departments+in+four+nations+contain+%22Amazonas%22+in+their+names.+The+Amazon+represents+over+half+of+the+planet%27s+remaining+rainforests%2C+and+comprises+the+largest+and+most+biodiverse+tract+of+tropical+rainforest+in+the+world%2C+with+an+estimated+390+billion+individual+trees+divided+into+16%2C000+species) +- [Translation with T5](https://huggingface.co/google-t5/t5-base?text=My+name+is+Wolfgang+and+I+live+in+Berlin) + +In Computer Vision: +- [Image classification with ViT](https://huggingface.co/google/vit-base-patch16-224) +- [Object Detection with DETR](https://huggingface.co/facebook/detr-resnet-50) +- [Semantic Segmentation with SegFormer](https://huggingface.co/nvidia/segformer-b0-finetuned-ade-512-512) +- [Panoptic Segmentation with Mask2Former](https://huggingface.co/facebook/mask2former-swin-large-coco-panoptic) +- [Depth Estimation with Depth Anything](https://huggingface.co/docs/transformers/main/model_doc/depth_anything) +- [Video Classification with VideoMAE](https://huggingface.co/docs/transformers/model_doc/videomae) +- [Universal Segmentation with OneFormer](https://huggingface.co/shi-labs/oneformer_ade20k_dinat_large) + +In Audio: +- [Automatic Speech Recognition with Whisper](https://huggingface.co/openai/whisper-large-v3) +- [Keyword Spotting with Wav2Vec2](https://huggingface.co/superb/wav2vec2-base-superb-ks) +- [Audio Classification with Audio Spectrogram Transformer](https://huggingface.co/MIT/ast-finetuned-audioset-10-10-0.4593) + +In Multimodal tasks: +- [Table Question Answering with TAPAS](https://huggingface.co/google/tapas-base-finetuned-wtq) +- [Visual Question Answering with ViLT](https://huggingface.co/dandelin/vilt-b32-finetuned-vqa) +- [Image captioning with LLaVa](https://huggingface.co/llava-hf/llava-1.5-7b-hf) +- [Zero-shot Image Classification with SigLIP](https://huggingface.co/google/siglip-so400m-patch14-384) +- [Document Question Answering with LayoutLM](https://huggingface.co/impira/layoutlm-document-qa) +- [Zero-shot Video Classification with X-CLIP](https://huggingface.co/docs/transformers/model_doc/xclip) +- [Zero-shot Object Detection with OWLv2](https://huggingface.co/docs/transformers/en/model_doc/owlv2) +- [Zero-shot Image Segmentation with CLIPSeg](https://huggingface.co/docs/transformers/model_doc/clipseg) +- [Automatic Mask Generation with SAM](https://huggingface.co/docs/transformers/model_doc/sam) + + +## 100 projects using Transformers + +Transformers is more than a toolkit to use pretrained models: it's a community of projects built around it and the +Hugging Face Hub. We want Transformers to enable developers, researchers, students, professors, engineers, and anyone +else to build their dream projects. + +In order to celebrate the 100,000 stars of transformers, we have decided to put the spotlight on the +community, and we have created the [awesome-transformers](./awesome-transformers.md) page which lists 100 +incredible projects built in the vicinity of transformers. + +If you own or use a project that you believe should be part of the list, please open a PR to add it! + +## If you are looking for custom support from the Hugging Face team + + + HuggingFace Expert Acceleration Program +
+ +## Quick tour + +To immediately use a model on a given input (text, image, audio, ...), we provide the `pipeline` API. Pipelines group together a pretrained model with the preprocessing that was used during that model's training. Here is how to quickly use a pipeline to classify positive versus negative texts: + +```python +>>> from transformers import pipeline + +# Allocate a pipeline for sentiment-analysis +>>> classifier = pipeline('sentiment-analysis') +>>> classifier('We are very happy to introduce pipeline to the transformers repository.') +[{'label': 'POSITIVE', 'score': 0.9996980428695679}] +``` + +The second line of code downloads and caches the pretrained model used by the pipeline, while the third evaluates it on the given text. Here, the answer is "positive" with a confidence of 99.97%. + +Many tasks have a pre-trained `pipeline` ready to go, in NLP but also in computer vision and speech. For example, we can easily extract detected objects in an image: + +``` python +>>> import requests +>>> from PIL import Image +>>> from transformers import pipeline + +# Download an image with cute cats +>>> url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/coco_sample.png" +>>> image_data = requests.get(url, stream=True).raw +>>> image = Image.open(image_data) + +# Allocate a pipeline for object detection +>>> object_detector = pipeline('object-detection') +>>> object_detector(image) +[{'score': 0.9982201457023621, + 'label': 'remote', + 'box': {'xmin': 40, 'ymin': 70, 'xmax': 175, 'ymax': 117}}, + {'score': 0.9960021376609802, + 'label': 'remote', + 'box': {'xmin': 333, 'ymin': 72, 'xmax': 368, 'ymax': 187}}, + {'score': 0.9954745173454285, + 'label': 'couch', + 'box': {'xmin': 0, 'ymin': 1, 'xmax': 639, 'ymax': 473}}, + {'score': 0.9988006353378296, + 'label': 'cat', + 'box': {'xmin': 13, 'ymin': 52, 'xmax': 314, 'ymax': 470}}, + {'score': 0.9986783862113953, + 'label': 'cat', + 'box': {'xmin': 345, 'ymin': 23, 'xmax': 640, 'ymax': 368}}] +``` + +Here, we get a list of objects detected in the image, with a box surrounding the object and a confidence score. Here is the original image on the left, with the predictions displayed on the right: + +

+ + +

+ +You can learn more about the tasks supported by the `pipeline` API in [this tutorial](https://huggingface.co/docs/transformers/task_summary). + +In addition to `pipeline`, to download and use any of the pretrained models on your given task, all it takes is three lines of code. Here is the PyTorch version: +```python +>>> from transformers import AutoTokenizer, AutoModel + +>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased") +>>> model = AutoModel.from_pretrained("google-bert/bert-base-uncased") + +>>> inputs = tokenizer("Hello world!", return_tensors="pt") +>>> outputs = model(**inputs) +``` + +And here is the equivalent code for TensorFlow: +```python +>>> from transformers import AutoTokenizer, TFAutoModel + +>>> tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-uncased") +>>> model = TFAutoModel.from_pretrained("google-bert/bert-base-uncased") + +>>> inputs = tokenizer("Hello world!", return_tensors="tf") +>>> outputs = model(**inputs) +``` + +The tokenizer is responsible for all the preprocessing the pretrained model expects and can be called directly on a single string (as in the above examples) or a list. It will output a dictionary that you can use in downstream code or simply directly pass to your model using the ** argument unpacking operator. + +The model itself is a regular [Pytorch `nn.Module`](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) or a [TensorFlow `tf.keras.Model`](https://www.tensorflow.org/api_docs/python/tf/keras/Model) (depending on your backend) which you can use as usual. [This tutorial](https://huggingface.co/docs/transformers/training) explains how to integrate such a model into a classic PyTorch or TensorFlow training loop, or how to use our `Trainer` API to quickly fine-tune on a new dataset. + +## Why should I use transformers? + +1. Easy-to-use state-of-the-art models: + - High performance on natural language understanding & generation, computer vision, and audio tasks. + - Low barrier to entry for educators and practitioners. + - Few user-facing abstractions with just three classes to learn. + - A unified API for using all our pretrained models. + +1. Lower compute costs, smaller carbon footprint: + - Researchers can share trained models instead of always retraining. + - Practitioners can reduce compute time and production costs. + - Dozens of architectures with over 400,000 pretrained models across all modalities. + +1. Choose the right framework for every part of a model's lifetime: + - Train state-of-the-art models in 3 lines of code. + - Move a single model between TF2.0/PyTorch/JAX frameworks at will. + - Seamlessly pick the right framework for training, evaluation, and production. + +1. Easily customize a model or an example to your needs: + - We provide examples for each architecture to reproduce the results published by its original authors. + - Model internals are exposed as consistently as possible. + - Model files can be used independently of the library for quick experiments. + +## Why shouldn't I use transformers? + +- This library is not a modular toolbox of building blocks for neural nets. The code in the model files is not refactored with additional abstractions on purpose, so that researchers can quickly iterate on each of the models without diving into additional abstractions/files. +- The training API is not intended to work on any model but is optimized to work with the models provided by the library. For generic machine learning loops, you should use another library (possibly, [Accelerate](https://huggingface.co/docs/accelerate)). +- While we strive to present as many use cases as possible, the scripts in our [examples folder](https://github.com/huggingface/transformers/tree/main/examples) are just that: examples. It is expected that they won't work out-of-the-box on your specific problem and that you will be required to change a few lines of code to adapt them to your needs. + +## Installation + +### With pip + +This repository is tested on Python 3.8+, Flax 0.4.1+, PyTorch 1.11+, and TensorFlow 2.6+. + +You should install 🤗 Transformers in a [virtual environment](https://docs.python.org/3/library/venv.html). If you're unfamiliar with Python virtual environments, check out the [user guide](https://packaging.python.org/guides/installing-using-pip-and-virtual-environments/). + +First, create a virtual environment with the version of Python you're going to use and activate it. + +Then, you will need to install at least one of Flax, PyTorch, or TensorFlow. +Please refer to [TensorFlow installation page](https://www.tensorflow.org/install/), [PyTorch installation page](https://pytorch.org/get-started/locally/#start-locally) and/or [Flax](https://github.com/google/flax#quick-install) and [Jax](https://github.com/google/jax#installation) installation pages regarding the specific installation command for your platform. + +When one of those backends has been installed, 🤗 Transformers can be installed using pip as follows: + +```bash +pip install transformers +``` + +If you'd like to play with the examples or need the bleeding edge of the code and can't wait for a new release, you must [install the library from source](https://huggingface.co/docs/transformers/installation#installing-from-source). + +### With conda + +🤗 Transformers can be installed using conda as follows: + +```shell script +conda install conda-forge::transformers +``` + +> **_NOTE:_** Installing `transformers` from the `huggingface` channel is deprecated. + +Follow the installation pages of Flax, PyTorch or TensorFlow to see how to install them with conda. + +> **_NOTE:_** On Windows, you may be prompted to activate Developer Mode in order to benefit from caching. If this is not an option for you, please let us know in [this issue](https://github.com/huggingface/huggingface_hub/issues/1062). + +## Model architectures + +**[All the model checkpoints](https://huggingface.co/models)** provided by 🤗 Transformers are seamlessly integrated from the huggingface.co [model hub](https://huggingface.co/models), where they are uploaded directly by [users](https://huggingface.co/users) and [organizations](https://huggingface.co/organizations). + +Current number of checkpoints: ![](https://img.shields.io/endpoint?url=https://huggingface.co/api/shields/models&color=brightgreen) + +🤗 Transformers currently provides the following architectures: see [here](https://huggingface.co/docs/transformers/model_summary) for a high-level summary of each them. + +To check if each model has an implementation in Flax, PyTorch or TensorFlow, or has an associated tokenizer backed by the 🤗 Tokenizers library, refer to [this table](https://huggingface.co/docs/transformers/index#supported-frameworks). + +These implementations have been tested on several datasets (see the example scripts) and should match the performance of the original implementations. You can find more details on performance in the Examples section of the [documentation](https://github.com/huggingface/transformers/tree/main/examples). + + +## Learn more + +| Section | Description | +|-|-| +| [Documentation](https://huggingface.co/docs/transformers/) | Full API documentation and tutorials | +| [Task summary](https://huggingface.co/docs/transformers/task_summary) | Tasks supported by 🤗 Transformers | +| [Preprocessing tutorial](https://huggingface.co/docs/transformers/preprocessing) | Using the `Tokenizer` class to prepare data for the models | +| [Training and fine-tuning](https://huggingface.co/docs/transformers/training) | Using the models provided by 🤗 Transformers in a PyTorch/TensorFlow training loop and the `Trainer` API | +| [Quick tour: Fine-tuning/usage scripts](https://github.com/huggingface/transformers/tree/main/examples) | Example scripts for fine-tuning models on a wide range of tasks | +| [Model sharing and uploading](https://huggingface.co/docs/transformers/model_sharing) | Upload and share your fine-tuned models with the community | + +## Citation + +We now have a [paper](https://www.aclweb.org/anthology/2020.emnlp-demos.6/) you can cite for the 🤗 Transformers library: +```bibtex +@inproceedings{wolf-etal-2020-transformers, + title = "Transformers: State-of-the-Art Natural Language Processing", + author = "Thomas Wolf and Lysandre Debut and Victor Sanh and Julien Chaumond and Clement Delangue and Anthony Moi and Pierric Cistac and Tim Rault and Rémi Louf and Morgan Funtowicz and Joe Davison and Sam Shleifer and Patrick von Platen and Clara Ma and Yacine Jernite and Julien Plu and Canwen Xu and Teven Le Scao and Sylvain Gugger and Mariama Drame and Quentin Lhoest and Alexander M. Rush", + booktitle = "Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: System Demonstrations", + month = oct, + year = "2020", + address = "Online", + publisher = "Association for Computational Linguistics", + url = "https://www.aclweb.org/anthology/2020.emnlp-demos.6", + pages = "38--45" +} +``` diff --git a/lib/transformers/src/transformers.egg-info/SOURCES.txt b/lib/transformers/src/transformers.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..275dfd2ad96aa60d3b9ba2930a6309fe070ba291 --- /dev/null +++ b/lib/transformers/src/transformers.egg-info/SOURCES.txt @@ -0,0 +1,1765 @@ +LICENSE +README.md +pyproject.toml +setup.py +src/transformers/__init__.py +src/transformers/activations.py +src/transformers/activations_tf.py +src/transformers/audio_utils.py +src/transformers/cache_utils.py +src/transformers/configuration_utils.py +src/transformers/convert_graph_to_onnx.py +src/transformers/convert_pytorch_checkpoint_to_tf2.py +src/transformers/convert_slow_tokenizer.py +src/transformers/convert_slow_tokenizers_checkpoints_to_fast.py +src/transformers/convert_tf_hub_seq_to_seq_bert_to_pytorch.py +src/transformers/debug_utils.py +src/transformers/deepspeed.py +src/transformers/dependency_versions_check.py +src/transformers/dependency_versions_table.py +src/transformers/dynamic_module_utils.py +src/transformers/feature_extraction_sequence_utils.py +src/transformers/feature_extraction_utils.py +src/transformers/file_utils.py +src/transformers/hf_argparser.py +src/transformers/hyperparameter_search.py +src/transformers/image_processing_base.py +src/transformers/image_processing_utils.py +src/transformers/image_processing_utils_fast.py +src/transformers/image_transforms.py +src/transformers/image_utils.py +src/transformers/keras_callbacks.py +src/transformers/modelcard.py +src/transformers/modeling_attn_mask_utils.py +src/transformers/modeling_flash_attention_utils.py +src/transformers/modeling_flax_outputs.py +src/transformers/modeling_flax_pytorch_utils.py +src/transformers/modeling_flax_utils.py +src/transformers/modeling_gguf_pytorch_utils.py +src/transformers/modeling_outputs.py +src/transformers/modeling_rope_utils.py +src/transformers/modeling_tf_outputs.py +src/transformers/modeling_tf_pytorch_utils.py +src/transformers/modeling_tf_utils.py +src/transformers/modeling_utils.py +src/transformers/optimization.py +src/transformers/optimization_tf.py +src/transformers/processing_utils.py +src/transformers/pytorch_utils.py +src/transformers/safetensors_conversion.py +src/transformers/testing_utils.py +src/transformers/tf_utils.py +src/transformers/time_series_utils.py +src/transformers/tokenization_utils.py +src/transformers/tokenization_utils_base.py +src/transformers/tokenization_utils_fast.py +src/transformers/trainer.py +src/transformers/trainer_callback.py +src/transformers/trainer_pt_utils.py +src/transformers/trainer_seq2seq.py +src/transformers/trainer_utils.py +src/transformers/training_args.py +src/transformers/training_args_seq2seq.py +src/transformers/training_args_tf.py +src/transformers.egg-info/PKG-INFO +src/transformers.egg-info/SOURCES.txt +src/transformers.egg-info/dependency_links.txt +src/transformers.egg-info/entry_points.txt +src/transformers.egg-info/not-zip-safe +src/transformers.egg-info/requires.txt +src/transformers.egg-info/top_level.txt +src/transformers/agents/__init__.py +src/transformers/agents/agent_types.py +src/transformers/agents/agents.py +src/transformers/agents/default_tools.py +src/transformers/agents/document_question_answering.py +src/transformers/agents/evaluate_agent.py +src/transformers/agents/image_question_answering.py +src/transformers/agents/llm_engine.py +src/transformers/agents/prompts.py +src/transformers/agents/python_interpreter.py +src/transformers/agents/speech_to_text.py +src/transformers/agents/text_to_speech.py +src/transformers/agents/tools.py +src/transformers/agents/translation.py +src/transformers/benchmark/__init__.py +src/transformers/benchmark/benchmark.py +src/transformers/benchmark/benchmark_args.py +src/transformers/benchmark/benchmark_args_tf.py +src/transformers/benchmark/benchmark_args_utils.py +src/transformers/benchmark/benchmark_tf.py +src/transformers/benchmark/benchmark_utils.py +src/transformers/commands/__init__.py +src/transformers/commands/add_new_model_like.py +src/transformers/commands/convert.py +src/transformers/commands/download.py +src/transformers/commands/env.py +src/transformers/commands/lfs.py +src/transformers/commands/pt_to_tf.py +src/transformers/commands/run.py +src/transformers/commands/serving.py +src/transformers/commands/train.py +src/transformers/commands/transformers_cli.py +src/transformers/commands/user.py +src/transformers/data/__init__.py +src/transformers/data/data_collator.py +src/transformers/data/datasets/__init__.py +src/transformers/data/datasets/glue.py +src/transformers/data/datasets/language_modeling.py +src/transformers/data/datasets/squad.py +src/transformers/data/metrics/__init__.py +src/transformers/data/metrics/squad_metrics.py +src/transformers/data/processors/__init__.py +src/transformers/data/processors/glue.py +src/transformers/data/processors/squad.py +src/transformers/data/processors/utils.py +src/transformers/data/processors/xnli.py +src/transformers/generation/__init__.py +src/transformers/generation/beam_constraints.py +src/transformers/generation/beam_search.py +src/transformers/generation/candidate_generator.py +src/transformers/generation/configuration_utils.py +src/transformers/generation/flax_logits_process.py +src/transformers/generation/flax_utils.py +src/transformers/generation/logits_process.py +src/transformers/generation/stopping_criteria.py +src/transformers/generation/streamers.py +src/transformers/generation/tf_logits_process.py +src/transformers/generation/tf_utils.py +src/transformers/generation/utils.py +src/transformers/generation/watermarking.py +src/transformers/integrations/__init__.py +src/transformers/integrations/aqlm.py +src/transformers/integrations/awq.py +src/transformers/integrations/bitsandbytes.py +src/transformers/integrations/deepspeed.py +src/transformers/integrations/eetq.py +src/transformers/integrations/fbgemm_fp8.py +src/transformers/integrations/ggml.py +src/transformers/integrations/hqq.py +src/transformers/integrations/integration_utils.py +src/transformers/integrations/peft.py +src/transformers/integrations/quanto.py +src/transformers/integrations/tpu.py +src/transformers/kernels/deformable_detr/ms_deform_attn.h +src/transformers/kernels/deformable_detr/vision.cpp +src/transformers/kernels/deformable_detr/cpu/ms_deform_attn_cpu.cpp +src/transformers/kernels/deformable_detr/cpu/ms_deform_attn_cpu.h +src/transformers/kernels/deformable_detr/cuda/ms_deform_attn_cuda.cu +src/transformers/kernels/deformable_detr/cuda/ms_deform_attn_cuda.cuh +src/transformers/kernels/deformable_detr/cuda/ms_deform_attn_cuda.h +src/transformers/kernels/deformable_detr/cuda/ms_deform_im2col_cuda.cuh +src/transformers/kernels/deta/ms_deform_attn.h +src/transformers/kernels/deta/vision.cpp +src/transformers/kernels/deta/cpu/ms_deform_attn_cpu.cpp +src/transformers/kernels/deta/cpu/ms_deform_attn_cpu.h +src/transformers/kernels/deta/cuda/ms_deform_attn_cuda.cu +src/transformers/kernels/deta/cuda/ms_deform_attn_cuda.cuh +src/transformers/kernels/deta/cuda/ms_deform_attn_cuda.h +src/transformers/kernels/deta/cuda/ms_deform_im2col_cuda.cuh +src/transformers/kernels/mra/cuda_kernel.cu +src/transformers/kernels/mra/cuda_kernel.h +src/transformers/kernels/mra/cuda_launch.cu +src/transformers/kernels/mra/cuda_launch.h +src/transformers/kernels/mra/torch_extension.cpp +src/transformers/kernels/rwkv/wkv_cuda.cu +src/transformers/kernels/rwkv/wkv_cuda_bf16.cu +src/transformers/kernels/rwkv/wkv_op.cpp +src/transformers/kernels/yoso/common.h +src/transformers/kernels/yoso/common_cuda.h +src/transformers/kernels/yoso/common_cuda_device.h +src/transformers/kernels/yoso/fast_lsh_cumulation.cu +src/transformers/kernels/yoso/fast_lsh_cumulation.h +src/transformers/kernels/yoso/fast_lsh_cumulation_cuda.cu +src/transformers/kernels/yoso/fast_lsh_cumulation_cuda.h +src/transformers/kernels/yoso/fast_lsh_cumulation_torch.cpp +src/transformers/models/__init__.py +src/transformers/models/albert/__init__.py +src/transformers/models/albert/configuration_albert.py +src/transformers/models/albert/convert_albert_original_tf_checkpoint_to_pytorch.py +src/transformers/models/albert/modeling_albert.py +src/transformers/models/albert/modeling_flax_albert.py +src/transformers/models/albert/modeling_tf_albert.py +src/transformers/models/albert/tokenization_albert.py +src/transformers/models/albert/tokenization_albert_fast.py +src/transformers/models/align/__init__.py +src/transformers/models/align/configuration_align.py +src/transformers/models/align/convert_align_tf_to_hf.py +src/transformers/models/align/modeling_align.py +src/transformers/models/align/processing_align.py +src/transformers/models/altclip/__init__.py +src/transformers/models/altclip/configuration_altclip.py +src/transformers/models/altclip/modeling_altclip.py +src/transformers/models/altclip/processing_altclip.py +src/transformers/models/audio_spectrogram_transformer/__init__.py +src/transformers/models/audio_spectrogram_transformer/configuration_audio_spectrogram_transformer.py +src/transformers/models/audio_spectrogram_transformer/convert_audio_spectrogram_transformer_original_to_pytorch.py +src/transformers/models/audio_spectrogram_transformer/feature_extraction_audio_spectrogram_transformer.py +src/transformers/models/audio_spectrogram_transformer/modeling_audio_spectrogram_transformer.py +src/transformers/models/auto/__init__.py +src/transformers/models/auto/auto_factory.py +src/transformers/models/auto/configuration_auto.py +src/transformers/models/auto/feature_extraction_auto.py +src/transformers/models/auto/image_processing_auto.py +src/transformers/models/auto/modeling_auto.py +src/transformers/models/auto/modeling_flax_auto.py +src/transformers/models/auto/modeling_tf_auto.py +src/transformers/models/auto/processing_auto.py +src/transformers/models/auto/tokenization_auto.py +src/transformers/models/autoformer/__init__.py +src/transformers/models/autoformer/configuration_autoformer.py +src/transformers/models/autoformer/modeling_autoformer.py +src/transformers/models/bark/__init__.py +src/transformers/models/bark/configuration_bark.py +src/transformers/models/bark/convert_suno_to_hf.py +src/transformers/models/bark/generation_configuration_bark.py +src/transformers/models/bark/modeling_bark.py +src/transformers/models/bark/processing_bark.py +src/transformers/models/bart/__init__.py +src/transformers/models/bart/configuration_bart.py +src/transformers/models/bart/convert_bart_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/bart/modeling_bart.py +src/transformers/models/bart/modeling_flax_bart.py +src/transformers/models/bart/modeling_tf_bart.py +src/transformers/models/bart/tokenization_bart.py +src/transformers/models/bart/tokenization_bart_fast.py +src/transformers/models/barthez/__init__.py +src/transformers/models/barthez/tokenization_barthez.py +src/transformers/models/barthez/tokenization_barthez_fast.py +src/transformers/models/bartpho/__init__.py +src/transformers/models/bartpho/tokenization_bartpho.py +src/transformers/models/beit/__init__.py +src/transformers/models/beit/configuration_beit.py +src/transformers/models/beit/convert_beit_unilm_to_pytorch.py +src/transformers/models/beit/feature_extraction_beit.py +src/transformers/models/beit/image_processing_beit.py +src/transformers/models/beit/modeling_beit.py +src/transformers/models/beit/modeling_flax_beit.py +src/transformers/models/bert/__init__.py +src/transformers/models/bert/configuration_bert.py +src/transformers/models/bert/convert_bert_original_tf2_checkpoint_to_pytorch.py +src/transformers/models/bert/convert_bert_original_tf_checkpoint_to_pytorch.py +src/transformers/models/bert/convert_bert_pytorch_checkpoint_to_original_tf.py +src/transformers/models/bert/convert_bert_token_dropping_original_tf2_checkpoint_to_pytorch.py +src/transformers/models/bert/modeling_bert.py +src/transformers/models/bert/modeling_flax_bert.py +src/transformers/models/bert/modeling_tf_bert.py +src/transformers/models/bert/tokenization_bert.py +src/transformers/models/bert/tokenization_bert_fast.py +src/transformers/models/bert/tokenization_bert_tf.py +src/transformers/models/bert_generation/__init__.py +src/transformers/models/bert_generation/configuration_bert_generation.py +src/transformers/models/bert_generation/modeling_bert_generation.py +src/transformers/models/bert_generation/tokenization_bert_generation.py +src/transformers/models/bert_japanese/__init__.py +src/transformers/models/bert_japanese/tokenization_bert_japanese.py +src/transformers/models/bertweet/__init__.py +src/transformers/models/bertweet/tokenization_bertweet.py +src/transformers/models/big_bird/__init__.py +src/transformers/models/big_bird/configuration_big_bird.py +src/transformers/models/big_bird/convert_bigbird_original_tf_checkpoint_to_pytorch.py +src/transformers/models/big_bird/modeling_big_bird.py +src/transformers/models/big_bird/modeling_flax_big_bird.py +src/transformers/models/big_bird/tokenization_big_bird.py +src/transformers/models/big_bird/tokenization_big_bird_fast.py +src/transformers/models/bigbird_pegasus/__init__.py +src/transformers/models/bigbird_pegasus/configuration_bigbird_pegasus.py +src/transformers/models/bigbird_pegasus/convert_bigbird_pegasus_tf_to_pytorch.py +src/transformers/models/bigbird_pegasus/modeling_bigbird_pegasus.py +src/transformers/models/biogpt/__init__.py +src/transformers/models/biogpt/configuration_biogpt.py +src/transformers/models/biogpt/convert_biogpt_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/biogpt/modeling_biogpt.py +src/transformers/models/biogpt/tokenization_biogpt.py +src/transformers/models/bit/__init__.py +src/transformers/models/bit/configuration_bit.py +src/transformers/models/bit/convert_bit_to_pytorch.py +src/transformers/models/bit/image_processing_bit.py +src/transformers/models/bit/modeling_bit.py +src/transformers/models/blenderbot/__init__.py +src/transformers/models/blenderbot/configuration_blenderbot.py +src/transformers/models/blenderbot/convert_blenderbot_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/blenderbot/modeling_blenderbot.py +src/transformers/models/blenderbot/modeling_flax_blenderbot.py +src/transformers/models/blenderbot/modeling_tf_blenderbot.py +src/transformers/models/blenderbot/tokenization_blenderbot.py +src/transformers/models/blenderbot/tokenization_blenderbot_fast.py +src/transformers/models/blenderbot_small/__init__.py +src/transformers/models/blenderbot_small/configuration_blenderbot_small.py +src/transformers/models/blenderbot_small/modeling_blenderbot_small.py +src/transformers/models/blenderbot_small/modeling_flax_blenderbot_small.py +src/transformers/models/blenderbot_small/modeling_tf_blenderbot_small.py +src/transformers/models/blenderbot_small/tokenization_blenderbot_small.py +src/transformers/models/blenderbot_small/tokenization_blenderbot_small_fast.py +src/transformers/models/blip/__init__.py +src/transformers/models/blip/configuration_blip.py +src/transformers/models/blip/convert_blip_original_pytorch_to_hf.py +src/transformers/models/blip/image_processing_blip.py +src/transformers/models/blip/modeling_blip.py +src/transformers/models/blip/modeling_blip_text.py +src/transformers/models/blip/modeling_tf_blip.py +src/transformers/models/blip/modeling_tf_blip_text.py +src/transformers/models/blip/processing_blip.py +src/transformers/models/blip_2/__init__.py +src/transformers/models/blip_2/configuration_blip_2.py +src/transformers/models/blip_2/convert_blip_2_original_to_pytorch.py +src/transformers/models/blip_2/modeling_blip_2.py +src/transformers/models/blip_2/processing_blip_2.py +src/transformers/models/bloom/__init__.py +src/transformers/models/bloom/configuration_bloom.py +src/transformers/models/bloom/convert_bloom_original_checkpoint_to_pytorch.py +src/transformers/models/bloom/modeling_bloom.py +src/transformers/models/bloom/modeling_flax_bloom.py +src/transformers/models/bloom/tokenization_bloom_fast.py +src/transformers/models/bridgetower/__init__.py +src/transformers/models/bridgetower/configuration_bridgetower.py +src/transformers/models/bridgetower/image_processing_bridgetower.py +src/transformers/models/bridgetower/modeling_bridgetower.py +src/transformers/models/bridgetower/processing_bridgetower.py +src/transformers/models/bros/__init__.py +src/transformers/models/bros/configuration_bros.py +src/transformers/models/bros/convert_bros_to_pytorch.py +src/transformers/models/bros/modeling_bros.py +src/transformers/models/bros/processing_bros.py +src/transformers/models/byt5/__init__.py +src/transformers/models/byt5/convert_byt5_original_tf_checkpoint_to_pytorch.py +src/transformers/models/byt5/tokenization_byt5.py +src/transformers/models/camembert/__init__.py +src/transformers/models/camembert/configuration_camembert.py +src/transformers/models/camembert/modeling_camembert.py +src/transformers/models/camembert/modeling_tf_camembert.py +src/transformers/models/camembert/tokenization_camembert.py +src/transformers/models/camembert/tokenization_camembert_fast.py +src/transformers/models/canine/__init__.py +src/transformers/models/canine/configuration_canine.py +src/transformers/models/canine/convert_canine_original_tf_checkpoint_to_pytorch.py +src/transformers/models/canine/modeling_canine.py +src/transformers/models/canine/tokenization_canine.py +src/transformers/models/chameleon/__init__.py +src/transformers/models/chameleon/configuration_chameleon.py +src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py +src/transformers/models/chameleon/image_processing_chameleon.py +src/transformers/models/chameleon/modeling_chameleon.py +src/transformers/models/chameleon/processing_chameleon.py +src/transformers/models/chinese_clip/__init__.py +src/transformers/models/chinese_clip/configuration_chinese_clip.py +src/transformers/models/chinese_clip/convert_chinese_clip_original_pytorch_to_hf.py +src/transformers/models/chinese_clip/feature_extraction_chinese_clip.py +src/transformers/models/chinese_clip/image_processing_chinese_clip.py +src/transformers/models/chinese_clip/modeling_chinese_clip.py +src/transformers/models/chinese_clip/processing_chinese_clip.py +src/transformers/models/clap/__init__.py +src/transformers/models/clap/configuration_clap.py +src/transformers/models/clap/convert_clap_original_pytorch_to_hf.py +src/transformers/models/clap/feature_extraction_clap.py +src/transformers/models/clap/modeling_clap.py +src/transformers/models/clap/processing_clap.py +src/transformers/models/clip/__init__.py +src/transformers/models/clip/configuration_clip.py +src/transformers/models/clip/convert_clip_original_pytorch_to_hf.py +src/transformers/models/clip/feature_extraction_clip.py +src/transformers/models/clip/image_processing_clip.py +src/transformers/models/clip/modeling_clip.py +src/transformers/models/clip/modeling_flax_clip.py +src/transformers/models/clip/modeling_tf_clip.py +src/transformers/models/clip/processing_clip.py +src/transformers/models/clip/tokenization_clip.py +src/transformers/models/clip/tokenization_clip_fast.py +src/transformers/models/clipseg/__init__.py +src/transformers/models/clipseg/configuration_clipseg.py +src/transformers/models/clipseg/convert_clipseg_original_pytorch_to_hf.py +src/transformers/models/clipseg/modeling_clipseg.py +src/transformers/models/clipseg/processing_clipseg.py +src/transformers/models/clvp/__init__.py +src/transformers/models/clvp/configuration_clvp.py +src/transformers/models/clvp/convert_clvp_to_hf.py +src/transformers/models/clvp/feature_extraction_clvp.py +src/transformers/models/clvp/modeling_clvp.py +src/transformers/models/clvp/number_normalizer.py +src/transformers/models/clvp/processing_clvp.py +src/transformers/models/clvp/tokenization_clvp.py +src/transformers/models/code_llama/__init__.py +src/transformers/models/code_llama/tokenization_code_llama.py +src/transformers/models/code_llama/tokenization_code_llama_fast.py +src/transformers/models/codegen/__init__.py +src/transformers/models/codegen/configuration_codegen.py +src/transformers/models/codegen/modeling_codegen.py +src/transformers/models/codegen/tokenization_codegen.py +src/transformers/models/codegen/tokenization_codegen_fast.py +src/transformers/models/cohere/__init__.py +src/transformers/models/cohere/configuration_cohere.py +src/transformers/models/cohere/modeling_cohere.py +src/transformers/models/cohere/tokenization_cohere_fast.py +src/transformers/models/conditional_detr/__init__.py +src/transformers/models/conditional_detr/configuration_conditional_detr.py +src/transformers/models/conditional_detr/convert_conditional_detr_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/conditional_detr/feature_extraction_conditional_detr.py +src/transformers/models/conditional_detr/image_processing_conditional_detr.py +src/transformers/models/conditional_detr/modeling_conditional_detr.py +src/transformers/models/convbert/__init__.py +src/transformers/models/convbert/configuration_convbert.py +src/transformers/models/convbert/convert_convbert_original_tf1_checkpoint_to_pytorch_and_tf2.py +src/transformers/models/convbert/modeling_convbert.py +src/transformers/models/convbert/modeling_tf_convbert.py +src/transformers/models/convbert/tokenization_convbert.py +src/transformers/models/convbert/tokenization_convbert_fast.py +src/transformers/models/convnext/__init__.py +src/transformers/models/convnext/configuration_convnext.py +src/transformers/models/convnext/convert_convnext_to_pytorch.py +src/transformers/models/convnext/feature_extraction_convnext.py +src/transformers/models/convnext/image_processing_convnext.py +src/transformers/models/convnext/modeling_convnext.py +src/transformers/models/convnext/modeling_tf_convnext.py +src/transformers/models/convnextv2/__init__.py +src/transformers/models/convnextv2/configuration_convnextv2.py +src/transformers/models/convnextv2/convert_convnextv2_to_pytorch.py +src/transformers/models/convnextv2/modeling_convnextv2.py +src/transformers/models/convnextv2/modeling_tf_convnextv2.py +src/transformers/models/cpm/__init__.py +src/transformers/models/cpm/tokenization_cpm.py +src/transformers/models/cpm/tokenization_cpm_fast.py +src/transformers/models/cpmant/__init__.py +src/transformers/models/cpmant/configuration_cpmant.py +src/transformers/models/cpmant/modeling_cpmant.py +src/transformers/models/cpmant/tokenization_cpmant.py +src/transformers/models/ctrl/__init__.py +src/transformers/models/ctrl/configuration_ctrl.py +src/transformers/models/ctrl/modeling_ctrl.py +src/transformers/models/ctrl/modeling_tf_ctrl.py +src/transformers/models/ctrl/tokenization_ctrl.py +src/transformers/models/cvt/__init__.py +src/transformers/models/cvt/configuration_cvt.py +src/transformers/models/cvt/convert_cvt_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/cvt/modeling_cvt.py +src/transformers/models/cvt/modeling_tf_cvt.py +src/transformers/models/data2vec/__init__.py +src/transformers/models/data2vec/configuration_data2vec_audio.py +src/transformers/models/data2vec/configuration_data2vec_text.py +src/transformers/models/data2vec/configuration_data2vec_vision.py +src/transformers/models/data2vec/convert_data2vec_audio_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/data2vec/convert_data2vec_text_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/data2vec/convert_data2vec_vision_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/data2vec/modeling_data2vec_audio.py +src/transformers/models/data2vec/modeling_data2vec_text.py +src/transformers/models/data2vec/modeling_data2vec_vision.py +src/transformers/models/data2vec/modeling_tf_data2vec_vision.py +src/transformers/models/dbrx/__init__.py +src/transformers/models/dbrx/configuration_dbrx.py +src/transformers/models/dbrx/modeling_dbrx.py +src/transformers/models/deberta/__init__.py +src/transformers/models/deberta/configuration_deberta.py +src/transformers/models/deberta/modeling_deberta.py +src/transformers/models/deberta/modeling_tf_deberta.py +src/transformers/models/deberta/tokenization_deberta.py +src/transformers/models/deberta/tokenization_deberta_fast.py +src/transformers/models/deberta_v2/__init__.py +src/transformers/models/deberta_v2/configuration_deberta_v2.py +src/transformers/models/deberta_v2/modeling_deberta_v2.py +src/transformers/models/deberta_v2/modeling_tf_deberta_v2.py +src/transformers/models/deberta_v2/tokenization_deberta_v2.py +src/transformers/models/deberta_v2/tokenization_deberta_v2_fast.py +src/transformers/models/decision_transformer/__init__.py +src/transformers/models/decision_transformer/configuration_decision_transformer.py +src/transformers/models/decision_transformer/modeling_decision_transformer.py +src/transformers/models/deformable_detr/__init__.py +src/transformers/models/deformable_detr/configuration_deformable_detr.py +src/transformers/models/deformable_detr/convert_deformable_detr_to_pytorch.py +src/transformers/models/deformable_detr/feature_extraction_deformable_detr.py +src/transformers/models/deformable_detr/image_processing_deformable_detr.py +src/transformers/models/deformable_detr/load_custom.py +src/transformers/models/deformable_detr/modeling_deformable_detr.py +src/transformers/models/deit/__init__.py +src/transformers/models/deit/configuration_deit.py +src/transformers/models/deit/convert_deit_timm_to_pytorch.py +src/transformers/models/deit/feature_extraction_deit.py +src/transformers/models/deit/image_processing_deit.py +src/transformers/models/deit/modeling_deit.py +src/transformers/models/deit/modeling_tf_deit.py +src/transformers/models/deprecated/__init__.py +src/transformers/models/deprecated/bort/__init__.py +src/transformers/models/deprecated/bort/convert_bort_original_gluonnlp_checkpoint_to_pytorch.py +src/transformers/models/deprecated/deta/__init__.py +src/transformers/models/deprecated/deta/configuration_deta.py +src/transformers/models/deprecated/deta/convert_deta_resnet_to_pytorch.py +src/transformers/models/deprecated/deta/convert_deta_swin_to_pytorch.py +src/transformers/models/deprecated/deta/image_processing_deta.py +src/transformers/models/deprecated/deta/modeling_deta.py +src/transformers/models/deprecated/efficientformer/__init__.py +src/transformers/models/deprecated/efficientformer/configuration_efficientformer.py +src/transformers/models/deprecated/efficientformer/convert_efficientformer_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/deprecated/efficientformer/image_processing_efficientformer.py +src/transformers/models/deprecated/efficientformer/modeling_efficientformer.py +src/transformers/models/deprecated/efficientformer/modeling_tf_efficientformer.py +src/transformers/models/deprecated/ernie_m/__init__.py +src/transformers/models/deprecated/ernie_m/configuration_ernie_m.py +src/transformers/models/deprecated/ernie_m/modeling_ernie_m.py +src/transformers/models/deprecated/ernie_m/tokenization_ernie_m.py +src/transformers/models/deprecated/gptsan_japanese/__init__.py +src/transformers/models/deprecated/gptsan_japanese/configuration_gptsan_japanese.py +src/transformers/models/deprecated/gptsan_japanese/convert_gptsan_tf_checkpoint_to_pytorch.py +src/transformers/models/deprecated/gptsan_japanese/modeling_gptsan_japanese.py +src/transformers/models/deprecated/gptsan_japanese/tokenization_gptsan_japanese.py +src/transformers/models/deprecated/graphormer/__init__.py +src/transformers/models/deprecated/graphormer/algos_graphormer.pyx +src/transformers/models/deprecated/graphormer/collating_graphormer.py +src/transformers/models/deprecated/graphormer/configuration_graphormer.py +src/transformers/models/deprecated/graphormer/modeling_graphormer.py +src/transformers/models/deprecated/jukebox/__init__.py +src/transformers/models/deprecated/jukebox/configuration_jukebox.py +src/transformers/models/deprecated/jukebox/convert_jukebox.py +src/transformers/models/deprecated/jukebox/modeling_jukebox.py +src/transformers/models/deprecated/jukebox/tokenization_jukebox.py +src/transformers/models/deprecated/mctct/__init__.py +src/transformers/models/deprecated/mctct/configuration_mctct.py +src/transformers/models/deprecated/mctct/feature_extraction_mctct.py +src/transformers/models/deprecated/mctct/modeling_mctct.py +src/transformers/models/deprecated/mctct/processing_mctct.py +src/transformers/models/deprecated/mega/__init__.py +src/transformers/models/deprecated/mega/configuration_mega.py +src/transformers/models/deprecated/mega/convert_mega_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/deprecated/mega/modeling_mega.py +src/transformers/models/deprecated/mmbt/__init__.py +src/transformers/models/deprecated/mmbt/configuration_mmbt.py +src/transformers/models/deprecated/mmbt/modeling_mmbt.py +src/transformers/models/deprecated/nat/__init__.py +src/transformers/models/deprecated/nat/configuration_nat.py +src/transformers/models/deprecated/nat/modeling_nat.py +src/transformers/models/deprecated/nezha/__init__.py +src/transformers/models/deprecated/nezha/configuration_nezha.py +src/transformers/models/deprecated/nezha/modeling_nezha.py +src/transformers/models/deprecated/open_llama/__init__.py +src/transformers/models/deprecated/open_llama/configuration_open_llama.py +src/transformers/models/deprecated/open_llama/modeling_open_llama.py +src/transformers/models/deprecated/qdqbert/__init__.py +src/transformers/models/deprecated/qdqbert/configuration_qdqbert.py +src/transformers/models/deprecated/qdqbert/modeling_qdqbert.py +src/transformers/models/deprecated/realm/__init__.py +src/transformers/models/deprecated/realm/configuration_realm.py +src/transformers/models/deprecated/realm/modeling_realm.py +src/transformers/models/deprecated/realm/retrieval_realm.py +src/transformers/models/deprecated/realm/tokenization_realm.py +src/transformers/models/deprecated/realm/tokenization_realm_fast.py +src/transformers/models/deprecated/retribert/__init__.py +src/transformers/models/deprecated/retribert/configuration_retribert.py +src/transformers/models/deprecated/retribert/modeling_retribert.py +src/transformers/models/deprecated/retribert/tokenization_retribert.py +src/transformers/models/deprecated/retribert/tokenization_retribert_fast.py +src/transformers/models/deprecated/speech_to_text_2/__init__.py +src/transformers/models/deprecated/speech_to_text_2/configuration_speech_to_text_2.py +src/transformers/models/deprecated/speech_to_text_2/modeling_speech_to_text_2.py +src/transformers/models/deprecated/speech_to_text_2/processing_speech_to_text_2.py +src/transformers/models/deprecated/speech_to_text_2/tokenization_speech_to_text_2.py +src/transformers/models/deprecated/tapex/__init__.py +src/transformers/models/deprecated/tapex/tokenization_tapex.py +src/transformers/models/deprecated/trajectory_transformer/__init__.py +src/transformers/models/deprecated/trajectory_transformer/configuration_trajectory_transformer.py +src/transformers/models/deprecated/trajectory_transformer/convert_trajectory_transformer_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/deprecated/trajectory_transformer/modeling_trajectory_transformer.py +src/transformers/models/deprecated/transfo_xl/__init__.py +src/transformers/models/deprecated/transfo_xl/configuration_transfo_xl.py +src/transformers/models/deprecated/transfo_xl/convert_transfo_xl_original_tf_checkpoint_to_pytorch.py +src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl.py +src/transformers/models/deprecated/transfo_xl/modeling_tf_transfo_xl_utilities.py +src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl.py +src/transformers/models/deprecated/transfo_xl/modeling_transfo_xl_utilities.py +src/transformers/models/deprecated/transfo_xl/tokenization_transfo_xl.py +src/transformers/models/deprecated/tvlt/__init__.py +src/transformers/models/deprecated/tvlt/configuration_tvlt.py +src/transformers/models/deprecated/tvlt/feature_extraction_tvlt.py +src/transformers/models/deprecated/tvlt/image_processing_tvlt.py +src/transformers/models/deprecated/tvlt/modeling_tvlt.py +src/transformers/models/deprecated/tvlt/processing_tvlt.py +src/transformers/models/deprecated/van/__init__.py +src/transformers/models/deprecated/van/configuration_van.py +src/transformers/models/deprecated/van/convert_van_to_pytorch.py +src/transformers/models/deprecated/van/modeling_van.py +src/transformers/models/deprecated/vit_hybrid/__init__.py +src/transformers/models/deprecated/vit_hybrid/configuration_vit_hybrid.py +src/transformers/models/deprecated/vit_hybrid/convert_vit_hybrid_timm_to_pytorch.py +src/transformers/models/deprecated/vit_hybrid/image_processing_vit_hybrid.py +src/transformers/models/deprecated/vit_hybrid/modeling_vit_hybrid.py +src/transformers/models/deprecated/xlm_prophetnet/__init__.py +src/transformers/models/deprecated/xlm_prophetnet/configuration_xlm_prophetnet.py +src/transformers/models/deprecated/xlm_prophetnet/modeling_xlm_prophetnet.py +src/transformers/models/deprecated/xlm_prophetnet/tokenization_xlm_prophetnet.py +src/transformers/models/depth_anything/__init__.py +src/transformers/models/depth_anything/configuration_depth_anything.py +src/transformers/models/depth_anything/convert_depth_anything_to_hf.py +src/transformers/models/depth_anything/modeling_depth_anything.py +src/transformers/models/detr/__init__.py +src/transformers/models/detr/configuration_detr.py +src/transformers/models/detr/convert_detr_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/detr/convert_detr_to_pytorch.py +src/transformers/models/detr/feature_extraction_detr.py +src/transformers/models/detr/image_processing_detr.py +src/transformers/models/detr/modeling_detr.py +src/transformers/models/dialogpt/__init__.py +src/transformers/models/dialogpt/convert_dialogpt_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/dinat/__init__.py +src/transformers/models/dinat/configuration_dinat.py +src/transformers/models/dinat/modeling_dinat.py +src/transformers/models/dinov2/__init__.py +src/transformers/models/dinov2/configuration_dinov2.py +src/transformers/models/dinov2/convert_dinov2_to_hf.py +src/transformers/models/dinov2/modeling_dinov2.py +src/transformers/models/distilbert/__init__.py +src/transformers/models/distilbert/configuration_distilbert.py +src/transformers/models/distilbert/modeling_distilbert.py +src/transformers/models/distilbert/modeling_flax_distilbert.py +src/transformers/models/distilbert/modeling_tf_distilbert.py +src/transformers/models/distilbert/tokenization_distilbert.py +src/transformers/models/distilbert/tokenization_distilbert_fast.py +src/transformers/models/dit/__init__.py +src/transformers/models/dit/convert_dit_unilm_to_pytorch.py +src/transformers/models/donut/__init__.py +src/transformers/models/donut/configuration_donut_swin.py +src/transformers/models/donut/convert_donut_to_pytorch.py +src/transformers/models/donut/feature_extraction_donut.py +src/transformers/models/donut/image_processing_donut.py +src/transformers/models/donut/modeling_donut_swin.py +src/transformers/models/donut/processing_donut.py +src/transformers/models/dpr/__init__.py +src/transformers/models/dpr/configuration_dpr.py +src/transformers/models/dpr/convert_dpr_original_checkpoint_to_pytorch.py +src/transformers/models/dpr/modeling_dpr.py +src/transformers/models/dpr/modeling_tf_dpr.py +src/transformers/models/dpr/tokenization_dpr.py +src/transformers/models/dpr/tokenization_dpr_fast.py +src/transformers/models/dpt/__init__.py +src/transformers/models/dpt/configuration_dpt.py +src/transformers/models/dpt/convert_dinov2_depth_to_hf.py +src/transformers/models/dpt/convert_dpt_beit_to_hf.py +src/transformers/models/dpt/convert_dpt_hybrid_to_pytorch.py +src/transformers/models/dpt/convert_dpt_swinv2_to_hf.py +src/transformers/models/dpt/convert_dpt_to_pytorch.py +src/transformers/models/dpt/feature_extraction_dpt.py +src/transformers/models/dpt/image_processing_dpt.py +src/transformers/models/dpt/modeling_dpt.py +src/transformers/models/efficientnet/__init__.py +src/transformers/models/efficientnet/configuration_efficientnet.py +src/transformers/models/efficientnet/convert_efficientnet_to_pytorch.py +src/transformers/models/efficientnet/image_processing_efficientnet.py +src/transformers/models/efficientnet/modeling_efficientnet.py +src/transformers/models/electra/__init__.py +src/transformers/models/electra/configuration_electra.py +src/transformers/models/electra/convert_electra_original_tf_checkpoint_to_pytorch.py +src/transformers/models/electra/modeling_electra.py +src/transformers/models/electra/modeling_flax_electra.py +src/transformers/models/electra/modeling_tf_electra.py +src/transformers/models/electra/tokenization_electra.py +src/transformers/models/electra/tokenization_electra_fast.py +src/transformers/models/encodec/__init__.py +src/transformers/models/encodec/configuration_encodec.py +src/transformers/models/encodec/convert_encodec_checkpoint_to_pytorch.py +src/transformers/models/encodec/feature_extraction_encodec.py +src/transformers/models/encodec/modeling_encodec.py +src/transformers/models/encoder_decoder/__init__.py +src/transformers/models/encoder_decoder/configuration_encoder_decoder.py +src/transformers/models/encoder_decoder/modeling_encoder_decoder.py +src/transformers/models/encoder_decoder/modeling_flax_encoder_decoder.py +src/transformers/models/encoder_decoder/modeling_tf_encoder_decoder.py +src/transformers/models/ernie/__init__.py +src/transformers/models/ernie/configuration_ernie.py +src/transformers/models/ernie/modeling_ernie.py +src/transformers/models/esm/__init__.py +src/transformers/models/esm/configuration_esm.py +src/transformers/models/esm/convert_esm.py +src/transformers/models/esm/modeling_esm.py +src/transformers/models/esm/modeling_esmfold.py +src/transformers/models/esm/modeling_tf_esm.py +src/transformers/models/esm/tokenization_esm.py +src/transformers/models/esm/openfold_utils/__init__.py +src/transformers/models/esm/openfold_utils/chunk_utils.py +src/transformers/models/esm/openfold_utils/data_transforms.py +src/transformers/models/esm/openfold_utils/feats.py +src/transformers/models/esm/openfold_utils/loss.py +src/transformers/models/esm/openfold_utils/protein.py +src/transformers/models/esm/openfold_utils/residue_constants.py +src/transformers/models/esm/openfold_utils/rigid_utils.py +src/transformers/models/esm/openfold_utils/tensor_utils.py +src/transformers/models/falcon/__init__.py +src/transformers/models/falcon/configuration_falcon.py +src/transformers/models/falcon/convert_custom_code_checkpoint.py +src/transformers/models/falcon/modeling_falcon.py +src/transformers/models/fastspeech2_conformer/__init__.py +src/transformers/models/fastspeech2_conformer/configuration_fastspeech2_conformer.py +src/transformers/models/fastspeech2_conformer/convert_fastspeech2_conformer_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/fastspeech2_conformer/convert_hifigan.py +src/transformers/models/fastspeech2_conformer/convert_model_with_hifigan.py +src/transformers/models/fastspeech2_conformer/modeling_fastspeech2_conformer.py +src/transformers/models/fastspeech2_conformer/tokenization_fastspeech2_conformer.py +src/transformers/models/flaubert/__init__.py +src/transformers/models/flaubert/configuration_flaubert.py +src/transformers/models/flaubert/modeling_flaubert.py +src/transformers/models/flaubert/modeling_tf_flaubert.py +src/transformers/models/flaubert/tokenization_flaubert.py +src/transformers/models/flava/__init__.py +src/transformers/models/flava/configuration_flava.py +src/transformers/models/flava/convert_dalle_to_flava_codebook.py +src/transformers/models/flava/convert_flava_original_pytorch_to_hf.py +src/transformers/models/flava/feature_extraction_flava.py +src/transformers/models/flava/image_processing_flava.py +src/transformers/models/flava/modeling_flava.py +src/transformers/models/flava/processing_flava.py +src/transformers/models/fnet/__init__.py +src/transformers/models/fnet/configuration_fnet.py +src/transformers/models/fnet/convert_fnet_original_flax_checkpoint_to_pytorch.py +src/transformers/models/fnet/modeling_fnet.py +src/transformers/models/fnet/tokenization_fnet.py +src/transformers/models/fnet/tokenization_fnet_fast.py +src/transformers/models/focalnet/__init__.py +src/transformers/models/focalnet/configuration_focalnet.py +src/transformers/models/focalnet/convert_focalnet_to_hf_format.py +src/transformers/models/focalnet/modeling_focalnet.py +src/transformers/models/fsmt/__init__.py +src/transformers/models/fsmt/configuration_fsmt.py +src/transformers/models/fsmt/convert_fsmt_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/fsmt/modeling_fsmt.py +src/transformers/models/fsmt/tokenization_fsmt.py +src/transformers/models/funnel/__init__.py +src/transformers/models/funnel/configuration_funnel.py +src/transformers/models/funnel/convert_funnel_original_tf_checkpoint_to_pytorch.py +src/transformers/models/funnel/modeling_funnel.py +src/transformers/models/funnel/modeling_tf_funnel.py +src/transformers/models/funnel/tokenization_funnel.py +src/transformers/models/funnel/tokenization_funnel_fast.py +src/transformers/models/fuyu/__init__.py +src/transformers/models/fuyu/configuration_fuyu.py +src/transformers/models/fuyu/convert_fuyu_model_weights_to_hf.py +src/transformers/models/fuyu/image_processing_fuyu.py +src/transformers/models/fuyu/modeling_fuyu.py +src/transformers/models/fuyu/processing_fuyu.py +src/transformers/models/gemma/__init__.py +src/transformers/models/gemma/configuration_gemma.py +src/transformers/models/gemma/convert_gemma_weights_to_hf.py +src/transformers/models/gemma/diff_gemma.py +src/transformers/models/gemma/modeling_flax_gemma.py +src/transformers/models/gemma/modeling_gemma.py +src/transformers/models/gemma/tokenization_gemma.py +src/transformers/models/gemma/tokenization_gemma_fast.py +src/transformers/models/gemma2/__init__.py +src/transformers/models/gemma2/configuration_gemma2.py +src/transformers/models/gemma2/convert_gemma2_weights_to_hf.py +src/transformers/models/gemma2/diff_gemma2.py +src/transformers/models/gemma2/modeling_gemma2.py +src/transformers/models/git/__init__.py +src/transformers/models/git/configuration_git.py +src/transformers/models/git/convert_git_to_pytorch.py +src/transformers/models/git/modeling_git.py +src/transformers/models/git/processing_git.py +src/transformers/models/glpn/__init__.py +src/transformers/models/glpn/configuration_glpn.py +src/transformers/models/glpn/convert_glpn_to_pytorch.py +src/transformers/models/glpn/feature_extraction_glpn.py +src/transformers/models/glpn/image_processing_glpn.py +src/transformers/models/glpn/modeling_glpn.py +src/transformers/models/gpt2/__init__.py +src/transformers/models/gpt2/configuration_gpt2.py +src/transformers/models/gpt2/convert_gpt2_original_tf_checkpoint_to_pytorch.py +src/transformers/models/gpt2/modeling_flax_gpt2.py +src/transformers/models/gpt2/modeling_gpt2.py +src/transformers/models/gpt2/modeling_tf_gpt2.py +src/transformers/models/gpt2/tokenization_gpt2.py +src/transformers/models/gpt2/tokenization_gpt2_fast.py +src/transformers/models/gpt2/tokenization_gpt2_tf.py +src/transformers/models/gpt_bigcode/__init__.py +src/transformers/models/gpt_bigcode/configuration_gpt_bigcode.py +src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py +src/transformers/models/gpt_neo/__init__.py +src/transformers/models/gpt_neo/configuration_gpt_neo.py +src/transformers/models/gpt_neo/convert_gpt_neo_mesh_tf_to_pytorch.py +src/transformers/models/gpt_neo/modeling_flax_gpt_neo.py +src/transformers/models/gpt_neo/modeling_gpt_neo.py +src/transformers/models/gpt_neox/__init__.py +src/transformers/models/gpt_neox/configuration_gpt_neox.py +src/transformers/models/gpt_neox/modeling_gpt_neox.py +src/transformers/models/gpt_neox/tokenization_gpt_neox_fast.py +src/transformers/models/gpt_neox_japanese/__init__.py +src/transformers/models/gpt_neox_japanese/configuration_gpt_neox_japanese.py +src/transformers/models/gpt_neox_japanese/modeling_gpt_neox_japanese.py +src/transformers/models/gpt_neox_japanese/tokenization_gpt_neox_japanese.py +src/transformers/models/gpt_sw3/__init__.py +src/transformers/models/gpt_sw3/convert_megatron_to_pytorch.py +src/transformers/models/gpt_sw3/tokenization_gpt_sw3.py +src/transformers/models/gptj/__init__.py +src/transformers/models/gptj/configuration_gptj.py +src/transformers/models/gptj/modeling_flax_gptj.py +src/transformers/models/gptj/modeling_gptj.py +src/transformers/models/gptj/modeling_tf_gptj.py +src/transformers/models/grounding_dino/__init__.py +src/transformers/models/grounding_dino/configuration_grounding_dino.py +src/transformers/models/grounding_dino/convert_grounding_dino_to_hf.py +src/transformers/models/grounding_dino/image_processing_grounding_dino.py +src/transformers/models/grounding_dino/modeling_grounding_dino.py +src/transformers/models/grounding_dino/processing_grounding_dino.py +src/transformers/models/groupvit/__init__.py +src/transformers/models/groupvit/configuration_groupvit.py +src/transformers/models/groupvit/convert_groupvit_nvlab_to_hf.py +src/transformers/models/groupvit/modeling_groupvit.py +src/transformers/models/groupvit/modeling_tf_groupvit.py +src/transformers/models/herbert/__init__.py +src/transformers/models/herbert/tokenization_herbert.py +src/transformers/models/herbert/tokenization_herbert_fast.py +src/transformers/models/hiera/__init__.py +src/transformers/models/hiera/configuration_hiera.py +src/transformers/models/hiera/convert_hiera_to_hf.py +src/transformers/models/hiera/modeling_hiera.py +src/transformers/models/hubert/__init__.py +src/transformers/models/hubert/configuration_hubert.py +src/transformers/models/hubert/convert_distilhubert_original_s3prl_checkpoint_to_pytorch.py +src/transformers/models/hubert/convert_hubert_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/hubert/convert_hubert_original_s3prl_checkpoint_to_pytorch.py +src/transformers/models/hubert/modeling_hubert.py +src/transformers/models/hubert/modeling_tf_hubert.py +src/transformers/models/ibert/__init__.py +src/transformers/models/ibert/configuration_ibert.py +src/transformers/models/ibert/modeling_ibert.py +src/transformers/models/ibert/quant_modules.py +src/transformers/models/idefics/__init__.py +src/transformers/models/idefics/configuration_idefics.py +src/transformers/models/idefics/image_processing_idefics.py +src/transformers/models/idefics/modeling_idefics.py +src/transformers/models/idefics/modeling_tf_idefics.py +src/transformers/models/idefics/perceiver.py +src/transformers/models/idefics/perceiver_tf.py +src/transformers/models/idefics/processing_idefics.py +src/transformers/models/idefics/vision.py +src/transformers/models/idefics/vision_tf.py +src/transformers/models/idefics2/__init__.py +src/transformers/models/idefics2/configuration_idefics2.py +src/transformers/models/idefics2/convert_idefics2_weights_to_hf.py +src/transformers/models/idefics2/image_processing_idefics2.py +src/transformers/models/idefics2/modeling_idefics2.py +src/transformers/models/idefics2/processing_idefics2.py +src/transformers/models/imagegpt/__init__.py +src/transformers/models/imagegpt/configuration_imagegpt.py +src/transformers/models/imagegpt/convert_imagegpt_original_tf2_to_pytorch.py +src/transformers/models/imagegpt/feature_extraction_imagegpt.py +src/transformers/models/imagegpt/image_processing_imagegpt.py +src/transformers/models/imagegpt/modeling_imagegpt.py +src/transformers/models/informer/__init__.py +src/transformers/models/informer/configuration_informer.py +src/transformers/models/informer/modeling_informer.py +src/transformers/models/instructblip/__init__.py +src/transformers/models/instructblip/configuration_instructblip.py +src/transformers/models/instructblip/convert_instructblip_original_to_pytorch.py +src/transformers/models/instructblip/modeling_instructblip.py +src/transformers/models/instructblip/processing_instructblip.py +src/transformers/models/instructblipvideo/__init__.py +src/transformers/models/instructblipvideo/configuration_instructblipvideo.py +src/transformers/models/instructblipvideo/convert_instructblipvideo_original_to_pytorch.py +src/transformers/models/instructblipvideo/diff_instructblipvideo.py +src/transformers/models/instructblipvideo/image_processing_instructblipvideo.py +src/transformers/models/instructblipvideo/modeling_instructblipvideo.py +src/transformers/models/instructblipvideo/processing_instructblipvideo.py +src/transformers/models/jamba/__init__.py +src/transformers/models/jamba/configuration_jamba.py +src/transformers/models/jamba/modeling_jamba.py +src/transformers/models/jetmoe/__init__.py +src/transformers/models/jetmoe/configuration_jetmoe.py +src/transformers/models/jetmoe/modeling_jetmoe.py +src/transformers/models/kosmos2/__init__.py +src/transformers/models/kosmos2/configuration_kosmos2.py +src/transformers/models/kosmos2/convert_kosmos2_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/kosmos2/modeling_kosmos2.py +src/transformers/models/kosmos2/processing_kosmos2.py +src/transformers/models/layoutlm/__init__.py +src/transformers/models/layoutlm/configuration_layoutlm.py +src/transformers/models/layoutlm/modeling_layoutlm.py +src/transformers/models/layoutlm/modeling_tf_layoutlm.py +src/transformers/models/layoutlm/tokenization_layoutlm.py +src/transformers/models/layoutlm/tokenization_layoutlm_fast.py +src/transformers/models/layoutlmv2/__init__.py +src/transformers/models/layoutlmv2/configuration_layoutlmv2.py +src/transformers/models/layoutlmv2/feature_extraction_layoutlmv2.py +src/transformers/models/layoutlmv2/image_processing_layoutlmv2.py +src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +src/transformers/models/layoutlmv2/processing_layoutlmv2.py +src/transformers/models/layoutlmv2/tokenization_layoutlmv2.py +src/transformers/models/layoutlmv2/tokenization_layoutlmv2_fast.py +src/transformers/models/layoutlmv3/__init__.py +src/transformers/models/layoutlmv3/configuration_layoutlmv3.py +src/transformers/models/layoutlmv3/feature_extraction_layoutlmv3.py +src/transformers/models/layoutlmv3/image_processing_layoutlmv3.py +src/transformers/models/layoutlmv3/modeling_layoutlmv3.py +src/transformers/models/layoutlmv3/modeling_tf_layoutlmv3.py +src/transformers/models/layoutlmv3/processing_layoutlmv3.py +src/transformers/models/layoutlmv3/tokenization_layoutlmv3.py +src/transformers/models/layoutlmv3/tokenization_layoutlmv3_fast.py +src/transformers/models/layoutxlm/__init__.py +src/transformers/models/layoutxlm/processing_layoutxlm.py +src/transformers/models/layoutxlm/tokenization_layoutxlm.py +src/transformers/models/layoutxlm/tokenization_layoutxlm_fast.py +src/transformers/models/led/__init__.py +src/transformers/models/led/configuration_led.py +src/transformers/models/led/modeling_led.py +src/transformers/models/led/modeling_tf_led.py +src/transformers/models/led/tokenization_led.py +src/transformers/models/led/tokenization_led_fast.py +src/transformers/models/levit/__init__.py +src/transformers/models/levit/configuration_levit.py +src/transformers/models/levit/convert_levit_timm_to_pytorch.py +src/transformers/models/levit/feature_extraction_levit.py +src/transformers/models/levit/image_processing_levit.py +src/transformers/models/levit/modeling_levit.py +src/transformers/models/lilt/__init__.py +src/transformers/models/lilt/configuration_lilt.py +src/transformers/models/lilt/modeling_lilt.py +src/transformers/models/llama/__init__.py +src/transformers/models/llama/configuration_llama.py +src/transformers/models/llama/convert_llama_weights_to_hf.py +src/transformers/models/llama/modeling_flax_llama.py +src/transformers/models/llama/modeling_llama.py +src/transformers/models/llama/tokenization_llama.py +src/transformers/models/llama/tokenization_llama_fast.py +src/transformers/models/llava/__init__.py +src/transformers/models/llava/configuration_llava.py +src/transformers/models/llava/convert_llava_weights_to_hf.py +src/transformers/models/llava/modeling_llava.py +src/transformers/models/llava/processing_llava.py +src/transformers/models/llava_next/__init__.py +src/transformers/models/llava_next/configuration_llava_next.py +src/transformers/models/llava_next/convert_llava_next_weights_to_hf.py +src/transformers/models/llava_next/image_processing_llava_next.py +src/transformers/models/llava_next/modeling_llava_next.py +src/transformers/models/llava_next/processing_llava_next.py +src/transformers/models/llava_next_video/__init__.py +src/transformers/models/llava_next_video/configuration_llava_next_video.py +src/transformers/models/llava_next_video/convert_llava_next_video_weights_to_hf.py +src/transformers/models/llava_next_video/diff_llava_next_video.py +src/transformers/models/llava_next_video/image_processing_llava_next_video.py +src/transformers/models/llava_next_video/modeling_llava_next_video.py +src/transformers/models/llava_next_video/processing_llava_next_video.py +src/transformers/models/longformer/__init__.py +src/transformers/models/longformer/configuration_longformer.py +src/transformers/models/longformer/convert_longformer_original_pytorch_lightning_to_pytorch.py +src/transformers/models/longformer/modeling_longformer.py +src/transformers/models/longformer/modeling_tf_longformer.py +src/transformers/models/longformer/tokenization_longformer.py +src/transformers/models/longformer/tokenization_longformer_fast.py +src/transformers/models/longt5/__init__.py +src/transformers/models/longt5/configuration_longt5.py +src/transformers/models/longt5/convert_longt5x_checkpoint_to_flax.py +src/transformers/models/longt5/modeling_flax_longt5.py +src/transformers/models/longt5/modeling_longt5.py +src/transformers/models/luke/__init__.py +src/transformers/models/luke/configuration_luke.py +src/transformers/models/luke/convert_luke_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/luke/modeling_luke.py +src/transformers/models/luke/tokenization_luke.py +src/transformers/models/lxmert/__init__.py +src/transformers/models/lxmert/configuration_lxmert.py +src/transformers/models/lxmert/convert_lxmert_original_tf_checkpoint_to_pytorch.py +src/transformers/models/lxmert/modeling_lxmert.py +src/transformers/models/lxmert/modeling_tf_lxmert.py +src/transformers/models/lxmert/tokenization_lxmert.py +src/transformers/models/lxmert/tokenization_lxmert_fast.py +src/transformers/models/m2m_100/__init__.py +src/transformers/models/m2m_100/configuration_m2m_100.py +src/transformers/models/m2m_100/convert_m2m100_original_checkpoint_to_pytorch.py +src/transformers/models/m2m_100/modeling_m2m_100.py +src/transformers/models/m2m_100/tokenization_m2m_100.py +src/transformers/models/mamba/__init__.py +src/transformers/models/mamba/configuration_mamba.py +src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py +src/transformers/models/mamba/modeling_mamba.py +src/transformers/models/marian/__init__.py +src/transformers/models/marian/configuration_marian.py +src/transformers/models/marian/convert_marian_tatoeba_to_pytorch.py +src/transformers/models/marian/convert_marian_to_pytorch.py +src/transformers/models/marian/modeling_flax_marian.py +src/transformers/models/marian/modeling_marian.py +src/transformers/models/marian/modeling_tf_marian.py +src/transformers/models/marian/tokenization_marian.py +src/transformers/models/markuplm/__init__.py +src/transformers/models/markuplm/configuration_markuplm.py +src/transformers/models/markuplm/feature_extraction_markuplm.py +src/transformers/models/markuplm/modeling_markuplm.py +src/transformers/models/markuplm/processing_markuplm.py +src/transformers/models/markuplm/tokenization_markuplm.py +src/transformers/models/markuplm/tokenization_markuplm_fast.py +src/transformers/models/mask2former/__init__.py +src/transformers/models/mask2former/configuration_mask2former.py +src/transformers/models/mask2former/convert_mask2former_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/mask2former/image_processing_mask2former.py +src/transformers/models/mask2former/modeling_mask2former.py +src/transformers/models/maskformer/__init__.py +src/transformers/models/maskformer/configuration_maskformer.py +src/transformers/models/maskformer/configuration_maskformer_swin.py +src/transformers/models/maskformer/convert_maskformer_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/maskformer/convert_maskformer_resnet_to_pytorch.py +src/transformers/models/maskformer/convert_maskformer_swin_to_pytorch.py +src/transformers/models/maskformer/feature_extraction_maskformer.py +src/transformers/models/maskformer/image_processing_maskformer.py +src/transformers/models/maskformer/modeling_maskformer.py +src/transformers/models/maskformer/modeling_maskformer_swin.py +src/transformers/models/mbart/__init__.py +src/transformers/models/mbart/configuration_mbart.py +src/transformers/models/mbart/convert_mbart_original_checkpoint_to_pytorch.py +src/transformers/models/mbart/modeling_flax_mbart.py +src/transformers/models/mbart/modeling_mbart.py +src/transformers/models/mbart/modeling_tf_mbart.py +src/transformers/models/mbart/tokenization_mbart.py +src/transformers/models/mbart/tokenization_mbart_fast.py +src/transformers/models/mbart50/__init__.py +src/transformers/models/mbart50/tokenization_mbart50.py +src/transformers/models/mbart50/tokenization_mbart50_fast.py +src/transformers/models/megatron_bert/__init__.py +src/transformers/models/megatron_bert/configuration_megatron_bert.py +src/transformers/models/megatron_bert/convert_megatron_bert_checkpoint.py +src/transformers/models/megatron_bert/modeling_megatron_bert.py +src/transformers/models/megatron_gpt2/__init__.py +src/transformers/models/megatron_gpt2/checkpoint_reshaping_and_interoperability.py +src/transformers/models/megatron_gpt2/convert_megatron_gpt2_checkpoint.py +src/transformers/models/mgp_str/__init__.py +src/transformers/models/mgp_str/configuration_mgp_str.py +src/transformers/models/mgp_str/modeling_mgp_str.py +src/transformers/models/mgp_str/processing_mgp_str.py +src/transformers/models/mgp_str/tokenization_mgp_str.py +src/transformers/models/mistral/__init__.py +src/transformers/models/mistral/configuration_mistral.py +src/transformers/models/mistral/convert_mistral_weights_to_hf.py +src/transformers/models/mistral/modeling_flax_mistral.py +src/transformers/models/mistral/modeling_mistral.py +src/transformers/models/mistral/modeling_tf_mistral.py +src/transformers/models/mixtral/__init__.py +src/transformers/models/mixtral/configuration_mixtral.py +src/transformers/models/mixtral/convert_mixtral_weights_to_hf.py +src/transformers/models/mixtral/modeling_mixtral.py +src/transformers/models/mluke/__init__.py +src/transformers/models/mluke/convert_mluke_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/mluke/tokenization_mluke.py +src/transformers/models/mobilebert/__init__.py +src/transformers/models/mobilebert/configuration_mobilebert.py +src/transformers/models/mobilebert/convert_mobilebert_original_tf_checkpoint_to_pytorch.py +src/transformers/models/mobilebert/modeling_mobilebert.py +src/transformers/models/mobilebert/modeling_tf_mobilebert.py +src/transformers/models/mobilebert/tokenization_mobilebert.py +src/transformers/models/mobilebert/tokenization_mobilebert_fast.py +src/transformers/models/mobilenet_v1/__init__.py +src/transformers/models/mobilenet_v1/configuration_mobilenet_v1.py +src/transformers/models/mobilenet_v1/convert_original_tf_checkpoint_to_pytorch.py +src/transformers/models/mobilenet_v1/feature_extraction_mobilenet_v1.py +src/transformers/models/mobilenet_v1/image_processing_mobilenet_v1.py +src/transformers/models/mobilenet_v1/modeling_mobilenet_v1.py +src/transformers/models/mobilenet_v2/__init__.py +src/transformers/models/mobilenet_v2/configuration_mobilenet_v2.py +src/transformers/models/mobilenet_v2/convert_original_tf_checkpoint_to_pytorch.py +src/transformers/models/mobilenet_v2/feature_extraction_mobilenet_v2.py +src/transformers/models/mobilenet_v2/image_processing_mobilenet_v2.py +src/transformers/models/mobilenet_v2/modeling_mobilenet_v2.py +src/transformers/models/mobilevit/__init__.py +src/transformers/models/mobilevit/configuration_mobilevit.py +src/transformers/models/mobilevit/convert_mlcvnets_to_pytorch.py +src/transformers/models/mobilevit/feature_extraction_mobilevit.py +src/transformers/models/mobilevit/image_processing_mobilevit.py +src/transformers/models/mobilevit/modeling_mobilevit.py +src/transformers/models/mobilevit/modeling_tf_mobilevit.py +src/transformers/models/mobilevitv2/__init__.py +src/transformers/models/mobilevitv2/configuration_mobilevitv2.py +src/transformers/models/mobilevitv2/convert_mlcvnets_to_pytorch.py +src/transformers/models/mobilevitv2/modeling_mobilevitv2.py +src/transformers/models/mpnet/__init__.py +src/transformers/models/mpnet/configuration_mpnet.py +src/transformers/models/mpnet/modeling_mpnet.py +src/transformers/models/mpnet/modeling_tf_mpnet.py +src/transformers/models/mpnet/tokenization_mpnet.py +src/transformers/models/mpnet/tokenization_mpnet_fast.py +src/transformers/models/mpt/__init__.py +src/transformers/models/mpt/configuration_mpt.py +src/transformers/models/mpt/modeling_mpt.py +src/transformers/models/mra/__init__.py +src/transformers/models/mra/configuration_mra.py +src/transformers/models/mra/convert_mra_pytorch_to_pytorch.py +src/transformers/models/mra/modeling_mra.py +src/transformers/models/mt5/__init__.py +src/transformers/models/mt5/configuration_mt5.py +src/transformers/models/mt5/modeling_flax_mt5.py +src/transformers/models/mt5/modeling_mt5.py +src/transformers/models/mt5/modeling_tf_mt5.py +src/transformers/models/musicgen/__init__.py +src/transformers/models/musicgen/configuration_musicgen.py +src/transformers/models/musicgen/convert_musicgen_transformers.py +src/transformers/models/musicgen/modeling_musicgen.py +src/transformers/models/musicgen/processing_musicgen.py +src/transformers/models/musicgen_melody/__init__.py +src/transformers/models/musicgen_melody/configuration_musicgen_melody.py +src/transformers/models/musicgen_melody/convert_musicgen_melody_transformers.py +src/transformers/models/musicgen_melody/feature_extraction_musicgen_melody.py +src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +src/transformers/models/musicgen_melody/processing_musicgen_melody.py +src/transformers/models/mvp/__init__.py +src/transformers/models/mvp/configuration_mvp.py +src/transformers/models/mvp/modeling_mvp.py +src/transformers/models/mvp/tokenization_mvp.py +src/transformers/models/mvp/tokenization_mvp_fast.py +src/transformers/models/nllb/__init__.py +src/transformers/models/nllb/tokenization_nllb.py +src/transformers/models/nllb/tokenization_nllb_fast.py +src/transformers/models/nllb_moe/__init__.py +src/transformers/models/nllb_moe/configuration_nllb_moe.py +src/transformers/models/nllb_moe/convert_nllb_moe_sharded_original_checkpoint_to_pytorch.py +src/transformers/models/nllb_moe/modeling_nllb_moe.py +src/transformers/models/nougat/__init__.py +src/transformers/models/nougat/convert_nougat_to_hf.py +src/transformers/models/nougat/image_processing_nougat.py +src/transformers/models/nougat/processing_nougat.py +src/transformers/models/nougat/tokenization_nougat_fast.py +src/transformers/models/nystromformer/__init__.py +src/transformers/models/nystromformer/configuration_nystromformer.py +src/transformers/models/nystromformer/convert_nystromformer_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/nystromformer/modeling_nystromformer.py +src/transformers/models/olmo/__init__.py +src/transformers/models/olmo/configuration_olmo.py +src/transformers/models/olmo/convert_olmo_weights_to_hf.py +src/transformers/models/olmo/modeling_olmo.py +src/transformers/models/oneformer/__init__.py +src/transformers/models/oneformer/configuration_oneformer.py +src/transformers/models/oneformer/convert_to_hf_oneformer.py +src/transformers/models/oneformer/image_processing_oneformer.py +src/transformers/models/oneformer/modeling_oneformer.py +src/transformers/models/oneformer/processing_oneformer.py +src/transformers/models/openai/__init__.py +src/transformers/models/openai/configuration_openai.py +src/transformers/models/openai/convert_openai_original_tf_checkpoint_to_pytorch.py +src/transformers/models/openai/modeling_openai.py +src/transformers/models/openai/modeling_tf_openai.py +src/transformers/models/openai/tokenization_openai.py +src/transformers/models/openai/tokenization_openai_fast.py +src/transformers/models/opt/__init__.py +src/transformers/models/opt/configuration_opt.py +src/transformers/models/opt/convert_opt_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/opt/modeling_flax_opt.py +src/transformers/models/opt/modeling_opt.py +src/transformers/models/opt/modeling_tf_opt.py +src/transformers/models/owlv2/__init__.py +src/transformers/models/owlv2/configuration_owlv2.py +src/transformers/models/owlv2/convert_owlv2_to_hf.py +src/transformers/models/owlv2/image_processing_owlv2.py +src/transformers/models/owlv2/modeling_owlv2.py +src/transformers/models/owlv2/processing_owlv2.py +src/transformers/models/owlvit/__init__.py +src/transformers/models/owlvit/configuration_owlvit.py +src/transformers/models/owlvit/convert_owlvit_original_flax_to_hf.py +src/transformers/models/owlvit/feature_extraction_owlvit.py +src/transformers/models/owlvit/image_processing_owlvit.py +src/transformers/models/owlvit/modeling_owlvit.py +src/transformers/models/owlvit/processing_owlvit.py +src/transformers/models/paligemma/__init__.py +src/transformers/models/paligemma/configuration_paligemma.py +src/transformers/models/paligemma/convert_paligemma_weights_to_hf.py +src/transformers/models/paligemma/modeling_paligemma.py +src/transformers/models/paligemma/processing_paligemma.py +src/transformers/models/patchtsmixer/__init__.py +src/transformers/models/patchtsmixer/configuration_patchtsmixer.py +src/transformers/models/patchtsmixer/modeling_patchtsmixer.py +src/transformers/models/patchtst/__init__.py +src/transformers/models/patchtst/configuration_patchtst.py +src/transformers/models/patchtst/modeling_patchtst.py +src/transformers/models/pegasus/__init__.py +src/transformers/models/pegasus/configuration_pegasus.py +src/transformers/models/pegasus/convert_pegasus_tf_to_pytorch.py +src/transformers/models/pegasus/modeling_flax_pegasus.py +src/transformers/models/pegasus/modeling_pegasus.py +src/transformers/models/pegasus/modeling_tf_pegasus.py +src/transformers/models/pegasus/tokenization_pegasus.py +src/transformers/models/pegasus/tokenization_pegasus_fast.py +src/transformers/models/pegasus_x/__init__.py +src/transformers/models/pegasus_x/configuration_pegasus_x.py +src/transformers/models/pegasus_x/modeling_pegasus_x.py +src/transformers/models/perceiver/__init__.py +src/transformers/models/perceiver/configuration_perceiver.py +src/transformers/models/perceiver/convert_perceiver_haiku_to_pytorch.py +src/transformers/models/perceiver/feature_extraction_perceiver.py +src/transformers/models/perceiver/image_processing_perceiver.py +src/transformers/models/perceiver/modeling_perceiver.py +src/transformers/models/perceiver/tokenization_perceiver.py +src/transformers/models/persimmon/__init__.py +src/transformers/models/persimmon/configuration_persimmon.py +src/transformers/models/persimmon/convert_persimmon_weights_to_hf.py +src/transformers/models/persimmon/modeling_persimmon.py +src/transformers/models/phi/__init__.py +src/transformers/models/phi/configuration_phi.py +src/transformers/models/phi/convert_phi_weights_to_hf.py +src/transformers/models/phi/modeling_phi.py +src/transformers/models/phi3/__init__.py +src/transformers/models/phi3/configuration_phi3.py +src/transformers/models/phi3/modeling_phi3.py +src/transformers/models/phobert/__init__.py +src/transformers/models/phobert/tokenization_phobert.py +src/transformers/models/pix2struct/__init__.py +src/transformers/models/pix2struct/configuration_pix2struct.py +src/transformers/models/pix2struct/convert_pix2struct_original_pytorch_to_hf.py +src/transformers/models/pix2struct/image_processing_pix2struct.py +src/transformers/models/pix2struct/modeling_pix2struct.py +src/transformers/models/pix2struct/processing_pix2struct.py +src/transformers/models/plbart/__init__.py +src/transformers/models/plbart/configuration_plbart.py +src/transformers/models/plbart/convert_plbart_original_checkpoint_to_torch.py +src/transformers/models/plbart/modeling_plbart.py +src/transformers/models/plbart/tokenization_plbart.py +src/transformers/models/poolformer/__init__.py +src/transformers/models/poolformer/configuration_poolformer.py +src/transformers/models/poolformer/convert_poolformer_original_to_pytorch.py +src/transformers/models/poolformer/feature_extraction_poolformer.py +src/transformers/models/poolformer/image_processing_poolformer.py +src/transformers/models/poolformer/modeling_poolformer.py +src/transformers/models/pop2piano/__init__.py +src/transformers/models/pop2piano/configuration_pop2piano.py +src/transformers/models/pop2piano/convert_pop2piano_weights_to_hf.py +src/transformers/models/pop2piano/feature_extraction_pop2piano.py +src/transformers/models/pop2piano/modeling_pop2piano.py +src/transformers/models/pop2piano/processing_pop2piano.py +src/transformers/models/pop2piano/tokenization_pop2piano.py +src/transformers/models/prophetnet/__init__.py +src/transformers/models/prophetnet/configuration_prophetnet.py +src/transformers/models/prophetnet/convert_prophetnet_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/prophetnet/modeling_prophetnet.py +src/transformers/models/prophetnet/tokenization_prophetnet.py +src/transformers/models/pvt/__init__.py +src/transformers/models/pvt/configuration_pvt.py +src/transformers/models/pvt/convert_pvt_to_pytorch.py +src/transformers/models/pvt/image_processing_pvt.py +src/transformers/models/pvt/modeling_pvt.py +src/transformers/models/pvt_v2/__init__.py +src/transformers/models/pvt_v2/configuration_pvt_v2.py +src/transformers/models/pvt_v2/convert_pvt_v2_to_pytorch.py +src/transformers/models/pvt_v2/modeling_pvt_v2.py +src/transformers/models/qwen2/__init__.py +src/transformers/models/qwen2/configuration_qwen2.py +src/transformers/models/qwen2/modeling_qwen2.py +src/transformers/models/qwen2/tokenization_qwen2.py +src/transformers/models/qwen2/tokenization_qwen2_fast.py +src/transformers/models/qwen2_moe/__init__.py +src/transformers/models/qwen2_moe/configuration_qwen2_moe.py +src/transformers/models/qwen2_moe/modeling_qwen2_moe.py +src/transformers/models/rag/__init__.py +src/transformers/models/rag/configuration_rag.py +src/transformers/models/rag/modeling_rag.py +src/transformers/models/rag/modeling_tf_rag.py +src/transformers/models/rag/retrieval_rag.py +src/transformers/models/rag/tokenization_rag.py +src/transformers/models/recurrent_gemma/__init__.py +src/transformers/models/recurrent_gemma/configuration_recurrent_gemma.py +src/transformers/models/recurrent_gemma/convert_recurrent_gemma_to_hf.py +src/transformers/models/recurrent_gemma/modeling_recurrent_gemma.py +src/transformers/models/reformer/__init__.py +src/transformers/models/reformer/configuration_reformer.py +src/transformers/models/reformer/convert_reformer_trax_checkpoint_to_pytorch.py +src/transformers/models/reformer/modeling_reformer.py +src/transformers/models/reformer/tokenization_reformer.py +src/transformers/models/reformer/tokenization_reformer_fast.py +src/transformers/models/regnet/__init__.py +src/transformers/models/regnet/configuration_regnet.py +src/transformers/models/regnet/convert_regnet_seer_10b_to_pytorch.py +src/transformers/models/regnet/convert_regnet_to_pytorch.py +src/transformers/models/regnet/modeling_flax_regnet.py +src/transformers/models/regnet/modeling_regnet.py +src/transformers/models/regnet/modeling_tf_regnet.py +src/transformers/models/rembert/__init__.py +src/transformers/models/rembert/configuration_rembert.py +src/transformers/models/rembert/convert_rembert_tf_checkpoint_to_pytorch.py +src/transformers/models/rembert/modeling_rembert.py +src/transformers/models/rembert/modeling_tf_rembert.py +src/transformers/models/rembert/tokenization_rembert.py +src/transformers/models/rembert/tokenization_rembert_fast.py +src/transformers/models/resnet/__init__.py +src/transformers/models/resnet/configuration_resnet.py +src/transformers/models/resnet/convert_resnet_to_pytorch.py +src/transformers/models/resnet/modeling_flax_resnet.py +src/transformers/models/resnet/modeling_resnet.py +src/transformers/models/resnet/modeling_tf_resnet.py +src/transformers/models/roberta/__init__.py +src/transformers/models/roberta/configuration_roberta.py +src/transformers/models/roberta/convert_roberta_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/roberta/modeling_flax_roberta.py +src/transformers/models/roberta/modeling_roberta.py +src/transformers/models/roberta/modeling_tf_roberta.py +src/transformers/models/roberta/tokenization_roberta.py +src/transformers/models/roberta/tokenization_roberta_fast.py +src/transformers/models/roberta_prelayernorm/__init__.py +src/transformers/models/roberta_prelayernorm/configuration_roberta_prelayernorm.py +src/transformers/models/roberta_prelayernorm/convert_roberta_prelayernorm_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/roberta_prelayernorm/modeling_flax_roberta_prelayernorm.py +src/transformers/models/roberta_prelayernorm/modeling_roberta_prelayernorm.py +src/transformers/models/roberta_prelayernorm/modeling_tf_roberta_prelayernorm.py +src/transformers/models/roc_bert/__init__.py +src/transformers/models/roc_bert/configuration_roc_bert.py +src/transformers/models/roc_bert/modeling_roc_bert.py +src/transformers/models/roc_bert/tokenization_roc_bert.py +src/transformers/models/roformer/__init__.py +src/transformers/models/roformer/configuration_roformer.py +src/transformers/models/roformer/convert_roformer_original_tf_checkpoint_to_pytorch.py +src/transformers/models/roformer/modeling_flax_roformer.py +src/transformers/models/roformer/modeling_roformer.py +src/transformers/models/roformer/modeling_tf_roformer.py +src/transformers/models/roformer/tokenization_roformer.py +src/transformers/models/roformer/tokenization_roformer_fast.py +src/transformers/models/roformer/tokenization_utils.py +src/transformers/models/rt_detr/__init__.py +src/transformers/models/rt_detr/configuration_rt_detr.py +src/transformers/models/rt_detr/configuration_rt_detr_resnet.py +src/transformers/models/rt_detr/convert_rt_detr_original_pytorch_checkpoint_to_hf.py +src/transformers/models/rt_detr/image_processing_rt_detr.py +src/transformers/models/rt_detr/modeling_rt_detr.py +src/transformers/models/rt_detr/modeling_rt_detr_resnet.py +src/transformers/models/rwkv/__init__.py +src/transformers/models/rwkv/configuration_rwkv.py +src/transformers/models/rwkv/convert_rwkv_checkpoint_to_hf.py +src/transformers/models/rwkv/modeling_rwkv.py +src/transformers/models/sam/__init__.py +src/transformers/models/sam/configuration_sam.py +src/transformers/models/sam/convert_sam_to_hf.py +src/transformers/models/sam/image_processing_sam.py +src/transformers/models/sam/modeling_sam.py +src/transformers/models/sam/modeling_tf_sam.py +src/transformers/models/sam/processing_sam.py +src/transformers/models/seamless_m4t/__init__.py +src/transformers/models/seamless_m4t/configuration_seamless_m4t.py +src/transformers/models/seamless_m4t/convert_fairseq2_to_hf.py +src/transformers/models/seamless_m4t/feature_extraction_seamless_m4t.py +src/transformers/models/seamless_m4t/modeling_seamless_m4t.py +src/transformers/models/seamless_m4t/processing_seamless_m4t.py +src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py +src/transformers/models/seamless_m4t/tokenization_seamless_m4t_fast.py +src/transformers/models/seamless_m4t_v2/__init__.py +src/transformers/models/seamless_m4t_v2/configuration_seamless_m4t_v2.py +src/transformers/models/seamless_m4t_v2/convert_fairseq2_to_hf.py +src/transformers/models/seamless_m4t_v2/modeling_seamless_m4t_v2.py +src/transformers/models/segformer/__init__.py +src/transformers/models/segformer/configuration_segformer.py +src/transformers/models/segformer/convert_segformer_original_to_pytorch.py +src/transformers/models/segformer/feature_extraction_segformer.py +src/transformers/models/segformer/image_processing_segformer.py +src/transformers/models/segformer/modeling_segformer.py +src/transformers/models/segformer/modeling_tf_segformer.py +src/transformers/models/seggpt/__init__.py +src/transformers/models/seggpt/configuration_seggpt.py +src/transformers/models/seggpt/convert_seggpt_to_hf.py +src/transformers/models/seggpt/image_processing_seggpt.py +src/transformers/models/seggpt/modeling_seggpt.py +src/transformers/models/sew/__init__.py +src/transformers/models/sew/configuration_sew.py +src/transformers/models/sew/convert_sew_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/sew/modeling_sew.py +src/transformers/models/sew_d/__init__.py +src/transformers/models/sew_d/configuration_sew_d.py +src/transformers/models/sew_d/convert_sew_d_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/sew_d/modeling_sew_d.py +src/transformers/models/siglip/__init__.py +src/transformers/models/siglip/configuration_siglip.py +src/transformers/models/siglip/convert_siglip_to_hf.py +src/transformers/models/siglip/image_processing_siglip.py +src/transformers/models/siglip/modeling_siglip.py +src/transformers/models/siglip/processing_siglip.py +src/transformers/models/siglip/tokenization_siglip.py +src/transformers/models/speech_encoder_decoder/__init__.py +src/transformers/models/speech_encoder_decoder/configuration_speech_encoder_decoder.py +src/transformers/models/speech_encoder_decoder/convert_mbart_wav2vec2_seq2seq_original_to_pytorch.py +src/transformers/models/speech_encoder_decoder/convert_speech_to_text_wav2vec2_seq2seq_original_to_pytorch.py +src/transformers/models/speech_encoder_decoder/modeling_flax_speech_encoder_decoder.py +src/transformers/models/speech_encoder_decoder/modeling_speech_encoder_decoder.py +src/transformers/models/speech_to_text/__init__.py +src/transformers/models/speech_to_text/configuration_speech_to_text.py +src/transformers/models/speech_to_text/convert_s2t_fairseq_to_tfms.py +src/transformers/models/speech_to_text/feature_extraction_speech_to_text.py +src/transformers/models/speech_to_text/modeling_speech_to_text.py +src/transformers/models/speech_to_text/modeling_tf_speech_to_text.py +src/transformers/models/speech_to_text/processing_speech_to_text.py +src/transformers/models/speech_to_text/tokenization_speech_to_text.py +src/transformers/models/speecht5/__init__.py +src/transformers/models/speecht5/configuration_speecht5.py +src/transformers/models/speecht5/convert_hifigan.py +src/transformers/models/speecht5/convert_speecht5_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/speecht5/feature_extraction_speecht5.py +src/transformers/models/speecht5/modeling_speecht5.py +src/transformers/models/speecht5/number_normalizer.py +src/transformers/models/speecht5/processing_speecht5.py +src/transformers/models/speecht5/tokenization_speecht5.py +src/transformers/models/splinter/__init__.py +src/transformers/models/splinter/configuration_splinter.py +src/transformers/models/splinter/modeling_splinter.py +src/transformers/models/splinter/tokenization_splinter.py +src/transformers/models/splinter/tokenization_splinter_fast.py +src/transformers/models/squeezebert/__init__.py +src/transformers/models/squeezebert/configuration_squeezebert.py +src/transformers/models/squeezebert/modeling_squeezebert.py +src/transformers/models/squeezebert/tokenization_squeezebert.py +src/transformers/models/squeezebert/tokenization_squeezebert_fast.py +src/transformers/models/stablelm/__init__.py +src/transformers/models/stablelm/configuration_stablelm.py +src/transformers/models/stablelm/modeling_stablelm.py +src/transformers/models/starcoder2/__init__.py +src/transformers/models/starcoder2/configuration_starcoder2.py +src/transformers/models/starcoder2/modeling_starcoder2.py +src/transformers/models/superpoint/__init__.py +src/transformers/models/superpoint/configuration_superpoint.py +src/transformers/models/superpoint/convert_superpoint_to_pytorch.py +src/transformers/models/superpoint/image_processing_superpoint.py +src/transformers/models/superpoint/modeling_superpoint.py +src/transformers/models/swiftformer/__init__.py +src/transformers/models/swiftformer/configuration_swiftformer.py +src/transformers/models/swiftformer/convert_swiftformer_original_to_hf.py +src/transformers/models/swiftformer/modeling_swiftformer.py +src/transformers/models/swiftformer/modeling_tf_swiftformer.py +src/transformers/models/swin/__init__.py +src/transformers/models/swin/configuration_swin.py +src/transformers/models/swin/convert_swin_simmim_to_pytorch.py +src/transformers/models/swin/convert_swin_timm_to_pytorch.py +src/transformers/models/swin/modeling_swin.py +src/transformers/models/swin/modeling_tf_swin.py +src/transformers/models/swin2sr/__init__.py +src/transformers/models/swin2sr/configuration_swin2sr.py +src/transformers/models/swin2sr/convert_swin2sr_original_to_pytorch.py +src/transformers/models/swin2sr/image_processing_swin2sr.py +src/transformers/models/swin2sr/modeling_swin2sr.py +src/transformers/models/swinv2/__init__.py +src/transformers/models/swinv2/configuration_swinv2.py +src/transformers/models/swinv2/convert_swinv2_timm_to_pytorch.py +src/transformers/models/swinv2/modeling_swinv2.py +src/transformers/models/switch_transformers/__init__.py +src/transformers/models/switch_transformers/configuration_switch_transformers.py +src/transformers/models/switch_transformers/convert_big_switch.py +src/transformers/models/switch_transformers/convert_switch_transformers_original_flax_checkpoint_to_pytorch.py +src/transformers/models/switch_transformers/modeling_switch_transformers.py +src/transformers/models/t5/__init__.py +src/transformers/models/t5/configuration_t5.py +src/transformers/models/t5/convert_t5_original_tf_checkpoint_to_pytorch.py +src/transformers/models/t5/convert_t5x_checkpoint_to_flax.py +src/transformers/models/t5/convert_t5x_checkpoint_to_pytorch.py +src/transformers/models/t5/modeling_flax_t5.py +src/transformers/models/t5/modeling_t5.py +src/transformers/models/t5/modeling_tf_t5.py +src/transformers/models/t5/tokenization_t5.py +src/transformers/models/t5/tokenization_t5_fast.py +src/transformers/models/table_transformer/__init__.py +src/transformers/models/table_transformer/configuration_table_transformer.py +src/transformers/models/table_transformer/convert_table_transformer_to_hf.py +src/transformers/models/table_transformer/convert_table_transformer_to_hf_no_timm.py +src/transformers/models/table_transformer/modeling_table_transformer.py +src/transformers/models/tapas/__init__.py +src/transformers/models/tapas/configuration_tapas.py +src/transformers/models/tapas/convert_tapas_original_tf_checkpoint_to_pytorch.py +src/transformers/models/tapas/modeling_tapas.py +src/transformers/models/tapas/modeling_tf_tapas.py +src/transformers/models/tapas/tokenization_tapas.py +src/transformers/models/time_series_transformer/__init__.py +src/transformers/models/time_series_transformer/configuration_time_series_transformer.py +src/transformers/models/time_series_transformer/modeling_time_series_transformer.py +src/transformers/models/timesformer/__init__.py +src/transformers/models/timesformer/configuration_timesformer.py +src/transformers/models/timesformer/convert_timesformer_to_pytorch.py +src/transformers/models/timesformer/modeling_timesformer.py +src/transformers/models/timm_backbone/__init__.py +src/transformers/models/timm_backbone/configuration_timm_backbone.py +src/transformers/models/timm_backbone/modeling_timm_backbone.py +src/transformers/models/trocr/__init__.py +src/transformers/models/trocr/configuration_trocr.py +src/transformers/models/trocr/convert_trocr_unilm_to_pytorch.py +src/transformers/models/trocr/modeling_trocr.py +src/transformers/models/trocr/processing_trocr.py +src/transformers/models/tvp/__init__.py +src/transformers/models/tvp/configuration_tvp.py +src/transformers/models/tvp/image_processing_tvp.py +src/transformers/models/tvp/modeling_tvp.py +src/transformers/models/tvp/processing_tvp.py +src/transformers/models/udop/__init__.py +src/transformers/models/udop/configuration_udop.py +src/transformers/models/udop/convert_udop_to_hf.py +src/transformers/models/udop/modeling_udop.py +src/transformers/models/udop/processing_udop.py +src/transformers/models/udop/tokenization_udop.py +src/transformers/models/udop/tokenization_udop_fast.py +src/transformers/models/umt5/__init__.py +src/transformers/models/umt5/configuration_umt5.py +src/transformers/models/umt5/convert_umt5_checkpoint_to_pytorch.py +src/transformers/models/umt5/modeling_umt5.py +src/transformers/models/unispeech/__init__.py +src/transformers/models/unispeech/configuration_unispeech.py +src/transformers/models/unispeech/convert_unispeech_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/unispeech/modeling_unispeech.py +src/transformers/models/unispeech_sat/__init__.py +src/transformers/models/unispeech_sat/configuration_unispeech_sat.py +src/transformers/models/unispeech_sat/convert_unispeech_original_s3prl_checkpoint_to_pytorch.py +src/transformers/models/unispeech_sat/convert_unispeech_sat_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/unispeech_sat/modeling_unispeech_sat.py +src/transformers/models/univnet/__init__.py +src/transformers/models/univnet/configuration_univnet.py +src/transformers/models/univnet/convert_univnet.py +src/transformers/models/univnet/feature_extraction_univnet.py +src/transformers/models/univnet/modeling_univnet.py +src/transformers/models/upernet/__init__.py +src/transformers/models/upernet/configuration_upernet.py +src/transformers/models/upernet/convert_convnext_upernet_to_pytorch.py +src/transformers/models/upernet/convert_swin_upernet_to_pytorch.py +src/transformers/models/upernet/modeling_upernet.py +src/transformers/models/video_llava/__init__.py +src/transformers/models/video_llava/configuration_video_llava.py +src/transformers/models/video_llava/convert_video_llava_weights_to_hf.py +src/transformers/models/video_llava/image_processing_video_llava.py +src/transformers/models/video_llava/modeling_video_llava.py +src/transformers/models/video_llava/processing_video_llava.py +src/transformers/models/videomae/__init__.py +src/transformers/models/videomae/configuration_videomae.py +src/transformers/models/videomae/convert_videomae_to_pytorch.py +src/transformers/models/videomae/feature_extraction_videomae.py +src/transformers/models/videomae/image_processing_videomae.py +src/transformers/models/videomae/modeling_videomae.py +src/transformers/models/vilt/__init__.py +src/transformers/models/vilt/configuration_vilt.py +src/transformers/models/vilt/convert_vilt_original_to_pytorch.py +src/transformers/models/vilt/feature_extraction_vilt.py +src/transformers/models/vilt/image_processing_vilt.py +src/transformers/models/vilt/modeling_vilt.py +src/transformers/models/vilt/processing_vilt.py +src/transformers/models/vipllava/__init__.py +src/transformers/models/vipllava/configuration_vipllava.py +src/transformers/models/vipllava/convert_vipllava_weights_to_hf.py +src/transformers/models/vipllava/modeling_vipllava.py +src/transformers/models/vision_encoder_decoder/__init__.py +src/transformers/models/vision_encoder_decoder/configuration_vision_encoder_decoder.py +src/transformers/models/vision_encoder_decoder/modeling_flax_vision_encoder_decoder.py +src/transformers/models/vision_encoder_decoder/modeling_tf_vision_encoder_decoder.py +src/transformers/models/vision_encoder_decoder/modeling_vision_encoder_decoder.py +src/transformers/models/vision_text_dual_encoder/__init__.py +src/transformers/models/vision_text_dual_encoder/configuration_vision_text_dual_encoder.py +src/transformers/models/vision_text_dual_encoder/modeling_flax_vision_text_dual_encoder.py +src/transformers/models/vision_text_dual_encoder/modeling_tf_vision_text_dual_encoder.py +src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +src/transformers/models/vision_text_dual_encoder/processing_vision_text_dual_encoder.py +src/transformers/models/visual_bert/__init__.py +src/transformers/models/visual_bert/configuration_visual_bert.py +src/transformers/models/visual_bert/convert_visual_bert_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/visual_bert/modeling_visual_bert.py +src/transformers/models/vit/__init__.py +src/transformers/models/vit/configuration_vit.py +src/transformers/models/vit/convert_dino_to_pytorch.py +src/transformers/models/vit/convert_vit_timm_to_pytorch.py +src/transformers/models/vit/feature_extraction_vit.py +src/transformers/models/vit/image_processing_vit.py +src/transformers/models/vit/image_processing_vit_fast.py +src/transformers/models/vit/modeling_flax_vit.py +src/transformers/models/vit/modeling_tf_vit.py +src/transformers/models/vit/modeling_vit.py +src/transformers/models/vit_mae/__init__.py +src/transformers/models/vit_mae/configuration_vit_mae.py +src/transformers/models/vit_mae/convert_vit_mae_to_pytorch.py +src/transformers/models/vit_mae/modeling_tf_vit_mae.py +src/transformers/models/vit_mae/modeling_vit_mae.py +src/transformers/models/vit_msn/__init__.py +src/transformers/models/vit_msn/configuration_vit_msn.py +src/transformers/models/vit_msn/convert_msn_to_pytorch.py +src/transformers/models/vit_msn/modeling_vit_msn.py +src/transformers/models/vitdet/__init__.py +src/transformers/models/vitdet/configuration_vitdet.py +src/transformers/models/vitdet/modeling_vitdet.py +src/transformers/models/vitmatte/__init__.py +src/transformers/models/vitmatte/configuration_vitmatte.py +src/transformers/models/vitmatte/convert_vitmatte_to_hf.py +src/transformers/models/vitmatte/image_processing_vitmatte.py +src/transformers/models/vitmatte/modeling_vitmatte.py +src/transformers/models/vits/__init__.py +src/transformers/models/vits/configuration_vits.py +src/transformers/models/vits/convert_original_checkpoint.py +src/transformers/models/vits/modeling_vits.py +src/transformers/models/vits/tokenization_vits.py +src/transformers/models/vivit/__init__.py +src/transformers/models/vivit/configuration_vivit.py +src/transformers/models/vivit/convert_vivit_flax_to_pytorch.py +src/transformers/models/vivit/image_processing_vivit.py +src/transformers/models/vivit/modeling_vivit.py +src/transformers/models/wav2vec2/__init__.py +src/transformers/models/wav2vec2/configuration_wav2vec2.py +src/transformers/models/wav2vec2/convert_wav2vec2_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/wav2vec2/convert_wav2vec2_original_s3prl_checkpoint_to_pytorch.py +src/transformers/models/wav2vec2/feature_extraction_wav2vec2.py +src/transformers/models/wav2vec2/modeling_flax_wav2vec2.py +src/transformers/models/wav2vec2/modeling_tf_wav2vec2.py +src/transformers/models/wav2vec2/modeling_wav2vec2.py +src/transformers/models/wav2vec2/processing_wav2vec2.py +src/transformers/models/wav2vec2/tokenization_wav2vec2.py +src/transformers/models/wav2vec2_bert/__init__.py +src/transformers/models/wav2vec2_bert/configuration_wav2vec2_bert.py +src/transformers/models/wav2vec2_bert/convert_wav2vec2_seamless_checkpoint.py +src/transformers/models/wav2vec2_bert/modeling_wav2vec2_bert.py +src/transformers/models/wav2vec2_bert/processing_wav2vec2_bert.py +src/transformers/models/wav2vec2_conformer/__init__.py +src/transformers/models/wav2vec2_conformer/configuration_wav2vec2_conformer.py +src/transformers/models/wav2vec2_conformer/convert_wav2vec2_conformer_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/wav2vec2_conformer/modeling_wav2vec2_conformer.py +src/transformers/models/wav2vec2_phoneme/__init__.py +src/transformers/models/wav2vec2_phoneme/tokenization_wav2vec2_phoneme.py +src/transformers/models/wav2vec2_with_lm/__init__.py +src/transformers/models/wav2vec2_with_lm/processing_wav2vec2_with_lm.py +src/transformers/models/wavlm/__init__.py +src/transformers/models/wavlm/configuration_wavlm.py +src/transformers/models/wavlm/convert_wavlm_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/wavlm/convert_wavlm_original_s3prl_checkpoint_to_pytorch.py +src/transformers/models/wavlm/modeling_wavlm.py +src/transformers/models/whisper/__init__.py +src/transformers/models/whisper/configuration_whisper.py +src/transformers/models/whisper/convert_openai_to_hf.py +src/transformers/models/whisper/english_normalizer.py +src/transformers/models/whisper/feature_extraction_whisper.py +src/transformers/models/whisper/generation_whisper.py +src/transformers/models/whisper/modeling_flax_whisper.py +src/transformers/models/whisper/modeling_tf_whisper.py +src/transformers/models/whisper/modeling_whisper.py +src/transformers/models/whisper/processing_whisper.py +src/transformers/models/whisper/tokenization_whisper.py +src/transformers/models/whisper/tokenization_whisper_fast.py +src/transformers/models/x_clip/__init__.py +src/transformers/models/x_clip/configuration_x_clip.py +src/transformers/models/x_clip/convert_x_clip_original_pytorch_to_hf.py +src/transformers/models/x_clip/modeling_x_clip.py +src/transformers/models/x_clip/processing_x_clip.py +src/transformers/models/xglm/__init__.py +src/transformers/models/xglm/configuration_xglm.py +src/transformers/models/xglm/convert_xglm_original_ckpt_to_trfms.py +src/transformers/models/xglm/modeling_flax_xglm.py +src/transformers/models/xglm/modeling_tf_xglm.py +src/transformers/models/xglm/modeling_xglm.py +src/transformers/models/xglm/tokenization_xglm.py +src/transformers/models/xglm/tokenization_xglm_fast.py +src/transformers/models/xlm/__init__.py +src/transformers/models/xlm/configuration_xlm.py +src/transformers/models/xlm/convert_xlm_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/xlm/modeling_tf_xlm.py +src/transformers/models/xlm/modeling_xlm.py +src/transformers/models/xlm/tokenization_xlm.py +src/transformers/models/xlm_roberta/__init__.py +src/transformers/models/xlm_roberta/configuration_xlm_roberta.py +src/transformers/models/xlm_roberta/modeling_flax_xlm_roberta.py +src/transformers/models/xlm_roberta/modeling_tf_xlm_roberta.py +src/transformers/models/xlm_roberta/modeling_xlm_roberta.py +src/transformers/models/xlm_roberta/tokenization_xlm_roberta.py +src/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.py +src/transformers/models/xlm_roberta_xl/__init__.py +src/transformers/models/xlm_roberta_xl/configuration_xlm_roberta_xl.py +src/transformers/models/xlm_roberta_xl/convert_xlm_roberta_xl_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/xlm_roberta_xl/modeling_xlm_roberta_xl.py +src/transformers/models/xlnet/__init__.py +src/transformers/models/xlnet/configuration_xlnet.py +src/transformers/models/xlnet/convert_xlnet_original_tf_checkpoint_to_pytorch.py +src/transformers/models/xlnet/modeling_tf_xlnet.py +src/transformers/models/xlnet/modeling_xlnet.py +src/transformers/models/xlnet/tokenization_xlnet.py +src/transformers/models/xlnet/tokenization_xlnet_fast.py +src/transformers/models/xmod/__init__.py +src/transformers/models/xmod/configuration_xmod.py +src/transformers/models/xmod/convert_xmod_original_pytorch_checkpoint_to_pytorch.py +src/transformers/models/xmod/modeling_xmod.py +src/transformers/models/yolos/__init__.py +src/transformers/models/yolos/configuration_yolos.py +src/transformers/models/yolos/convert_yolos_to_pytorch.py +src/transformers/models/yolos/feature_extraction_yolos.py +src/transformers/models/yolos/image_processing_yolos.py +src/transformers/models/yolos/modeling_yolos.py +src/transformers/models/yoso/__init__.py +src/transformers/models/yoso/configuration_yoso.py +src/transformers/models/yoso/convert_yoso_pytorch_to_pytorch.py +src/transformers/models/yoso/modeling_yoso.py +src/transformers/models/zoedepth/__init__.py +src/transformers/models/zoedepth/configuration_zoedepth.py +src/transformers/models/zoedepth/convert_zoedepth_to_hf.py +src/transformers/models/zoedepth/image_processing_zoedepth.py +src/transformers/models/zoedepth/modeling_zoedepth.py +src/transformers/onnx/__init__.py +src/transformers/onnx/__main__.py +src/transformers/onnx/config.py +src/transformers/onnx/convert.py +src/transformers/onnx/features.py +src/transformers/onnx/utils.py +src/transformers/pipelines/__init__.py +src/transformers/pipelines/audio_classification.py +src/transformers/pipelines/audio_utils.py +src/transformers/pipelines/automatic_speech_recognition.py +src/transformers/pipelines/base.py +src/transformers/pipelines/depth_estimation.py +src/transformers/pipelines/document_question_answering.py +src/transformers/pipelines/feature_extraction.py +src/transformers/pipelines/fill_mask.py +src/transformers/pipelines/image_classification.py +src/transformers/pipelines/image_feature_extraction.py +src/transformers/pipelines/image_segmentation.py +src/transformers/pipelines/image_to_image.py +src/transformers/pipelines/image_to_text.py +src/transformers/pipelines/mask_generation.py +src/transformers/pipelines/object_detection.py +src/transformers/pipelines/pt_utils.py +src/transformers/pipelines/question_answering.py +src/transformers/pipelines/table_question_answering.py +src/transformers/pipelines/text2text_generation.py +src/transformers/pipelines/text_classification.py +src/transformers/pipelines/text_generation.py +src/transformers/pipelines/text_to_audio.py +src/transformers/pipelines/token_classification.py +src/transformers/pipelines/video_classification.py +src/transformers/pipelines/visual_question_answering.py +src/transformers/pipelines/zero_shot_audio_classification.py +src/transformers/pipelines/zero_shot_classification.py +src/transformers/pipelines/zero_shot_image_classification.py +src/transformers/pipelines/zero_shot_object_detection.py +src/transformers/quantizers/__init__.py +src/transformers/quantizers/auto.py +src/transformers/quantizers/base.py +src/transformers/quantizers/quantizer_aqlm.py +src/transformers/quantizers/quantizer_awq.py +src/transformers/quantizers/quantizer_bnb_4bit.py +src/transformers/quantizers/quantizer_bnb_8bit.py +src/transformers/quantizers/quantizer_eetq.py +src/transformers/quantizers/quantizer_fbgemm_fp8.py +src/transformers/quantizers/quantizer_gptq.py +src/transformers/quantizers/quantizer_hqq.py +src/transformers/quantizers/quantizer_quanto.py +src/transformers/quantizers/quantizers_utils.py +src/transformers/sagemaker/__init__.py +src/transformers/sagemaker/trainer_sm.py +src/transformers/sagemaker/training_args_sm.py +src/transformers/utils/__init__.py +src/transformers/utils/backbone_utils.py +src/transformers/utils/bitsandbytes.py +src/transformers/utils/chat_template_utils.py +src/transformers/utils/constants.py +src/transformers/utils/deprecation.py +src/transformers/utils/doc.py +src/transformers/utils/dummy_detectron2_objects.py +src/transformers/utils/dummy_essentia_and_librosa_and_pretty_midi_and_scipy_and_torch_objects.py +src/transformers/utils/dummy_flax_objects.py +src/transformers/utils/dummy_keras_nlp_objects.py +src/transformers/utils/dummy_music_objects.py +src/transformers/utils/dummy_pt_objects.py +src/transformers/utils/dummy_sentencepiece_and_tokenizers_objects.py +src/transformers/utils/dummy_sentencepiece_objects.py +src/transformers/utils/dummy_speech_objects.py +src/transformers/utils/dummy_tensorflow_text_objects.py +src/transformers/utils/dummy_tf_objects.py +src/transformers/utils/dummy_tokenizers_objects.py +src/transformers/utils/dummy_torchaudio_objects.py +src/transformers/utils/dummy_torchvision_objects.py +src/transformers/utils/dummy_vision_objects.py +src/transformers/utils/fx.py +src/transformers/utils/generic.py +src/transformers/utils/hp_naming.py +src/transformers/utils/hub.py +src/transformers/utils/import_utils.py +src/transformers/utils/logging.py +src/transformers/utils/model_parallel_utils.py +src/transformers/utils/notebook.py +src/transformers/utils/peft_utils.py +src/transformers/utils/quantization_config.py +src/transformers/utils/sentencepiece_model_pb2.py +src/transformers/utils/sentencepiece_model_pb2_new.py +src/transformers/utils/versions.py +tests/test_backbone_common.py +tests/test_configuration_common.py +tests/test_feature_extraction_common.py +tests/test_image_processing_common.py +tests/test_image_transforms.py +tests/test_modeling_common.py +tests/test_modeling_flax_common.py +tests/test_modeling_tf_common.py +tests/test_pipeline_mixin.py +tests/test_processing_common.py +tests/test_sequence_feature_extraction_common.py +tests/test_tokenization_common.py \ No newline at end of file diff --git a/lib/transformers/src/transformers.egg-info/dependency_links.txt b/lib/transformers/src/transformers.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/lib/transformers/src/transformers.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/lib/transformers/src/transformers.egg-info/entry_points.txt b/lib/transformers/src/transformers.egg-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..8a7995ed6f21261a78509c57d57daba51ecf1a7d --- /dev/null +++ b/lib/transformers/src/transformers.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +transformers-cli = transformers.commands.transformers_cli:main diff --git a/lib/transformers/src/transformers.egg-info/not-zip-safe b/lib/transformers/src/transformers.egg-info/not-zip-safe new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/lib/transformers/src/transformers.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/lib/transformers/src/transformers.egg-info/requires.txt b/lib/transformers/src/transformers.egg-info/requires.txt new file mode 100644 index 0000000000000000000000000000000000000000..225911e719d12aa754c121ca23f19ee41181cb6f --- /dev/null +++ b/lib/transformers/src/transformers.egg-info/requires.txt @@ -0,0 +1,437 @@ +filelock +huggingface-hub<1.0,>=0.23.2 +numpy>=1.17 +packaging>=20.0 +pyyaml>=5.1 +regex!=2019.12.17 +requests +tokenizers<0.20,>=0.19 +safetensors>=0.4.1 +tqdm>=4.27 + +[accelerate] +accelerate>=0.21.0 + +[agents] +diffusers +accelerate>=0.21.0 +datasets!=2.5.0 +torch +sentencepiece!=0.1.92,>=0.1.91 +opencv-python +Pillow<=15.0,>=10.0.1 + +[all] +tensorflow<2.16,>2.9 +onnxconverter-common +tf2onnx +tensorflow-text<2.16 +keras-nlp<0.14.0,>=0.3.1 +torch +accelerate>=0.21.0 +jax<=0.4.13,>=0.4.1 +jaxlib<=0.4.13,>=0.4.1 +flax<=0.7.0,>=0.4.1 +optax<=0.1.4,>=0.0.8 +scipy<1.13.0 +sentencepiece!=0.1.92,>=0.1.91 +protobuf +tokenizers<0.20,>=0.19 +torchaudio +librosa +pyctcdecode>=0.4.0 +phonemizer +kenlm +Pillow<=15.0,>=10.0.1 +optuna +ray[tune]>=2.7.0 +sigopt +timm<=0.9.16 +torchvision +codecarbon==1.2.0 +decord==0.6.0 +av==9.2.0 + +[audio] +librosa +pyctcdecode>=0.4.0 +phonemizer +kenlm + +[benchmark] +optimum-benchmark>=0.2.0 + +[codecarbon] +codecarbon==1.2.0 + +[deepspeed] +deepspeed>=0.9.3 +accelerate>=0.21.0 + +[deepspeed-testing] +deepspeed>=0.9.3 +accelerate>=0.21.0 +pytest<8.0.0,>=7.2.0 +pytest-rich +pytest-xdist +timeout-decorator +parameterized +psutil +datasets!=2.5.0 +dill<0.3.5 +evaluate>=0.2.0 +pytest-timeout +ruff==0.4.4 +sacrebleu<2.0.0,>=1.4.12 +rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1 +nltk +GitPython<3.1.19 +sacremoses +rjieba +beautifulsoup4 +tensorboard +pydantic +sentencepiece!=0.1.92,>=0.1.91 +faiss-cpu +cookiecutter==1.7.3 +optuna +protobuf + +[dev] +tensorflow<2.16,>2.9 +onnxconverter-common +tf2onnx +tensorflow-text<2.16 +keras-nlp<0.14.0,>=0.3.1 +torch +accelerate>=0.21.0 +jax<=0.4.13,>=0.4.1 +jaxlib<=0.4.13,>=0.4.1 +flax<=0.7.0,>=0.4.1 +optax<=0.1.4,>=0.0.8 +scipy<1.13.0 +sentencepiece!=0.1.92,>=0.1.91 +protobuf +tokenizers<0.20,>=0.19 +torchaudio +librosa +pyctcdecode>=0.4.0 +phonemizer +kenlm +Pillow<=15.0,>=10.0.1 +optuna +ray[tune]>=2.7.0 +sigopt +timm<=0.9.16 +torchvision +codecarbon==1.2.0 +decord==0.6.0 +av==9.2.0 +pytest<8.0.0,>=7.2.0 +pytest-rich +pytest-xdist +timeout-decorator +parameterized +psutil +datasets!=2.5.0 +dill<0.3.5 +evaluate>=0.2.0 +pytest-timeout +ruff==0.4.4 +sacrebleu<2.0.0,>=1.4.12 +rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1 +nltk +GitPython<3.1.19 +sacremoses +rjieba +beautifulsoup4 +tensorboard +pydantic +faiss-cpu +cookiecutter==1.7.3 +isort>=5.5.4 +urllib3<2.0.0 +fugashi>=1.0 +ipadic<2.0,>=1.0.0 +unidic_lite>=1.0.7 +unidic>=1.0.2 +sudachipy>=0.6.6 +sudachidict_core>=20220729 +rhoknp<1.3.1,>=1.1.0 +scikit-learn + +[dev-tensorflow] +pytest<8.0.0,>=7.2.0 +pytest-rich +pytest-xdist +timeout-decorator +parameterized +psutil +datasets!=2.5.0 +dill<0.3.5 +evaluate>=0.2.0 +pytest-timeout +ruff==0.4.4 +sacrebleu<2.0.0,>=1.4.12 +rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1 +nltk +GitPython<3.1.19 +sacremoses +rjieba +beautifulsoup4 +tensorboard +pydantic +sentencepiece!=0.1.92,>=0.1.91 +faiss-cpu +cookiecutter==1.7.3 +tensorflow<2.16,>2.9 +onnxconverter-common +tf2onnx +tensorflow-text<2.16 +keras-nlp<0.14.0,>=0.3.1 +protobuf +tokenizers<0.20,>=0.19 +Pillow<=15.0,>=10.0.1 +isort>=5.5.4 +urllib3<2.0.0 +scikit-learn +onnxruntime>=1.4.0 +onnxruntime-tools>=1.4.2 +librosa +pyctcdecode>=0.4.0 +phonemizer +kenlm + +[dev-torch] +pytest<8.0.0,>=7.2.0 +pytest-rich +pytest-xdist +timeout-decorator +parameterized +psutil +datasets!=2.5.0 +dill<0.3.5 +evaluate>=0.2.0 +pytest-timeout +ruff==0.4.4 +sacrebleu<2.0.0,>=1.4.12 +rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1 +nltk +GitPython<3.1.19 +sacremoses +rjieba +beautifulsoup4 +tensorboard +pydantic +sentencepiece!=0.1.92,>=0.1.91 +faiss-cpu +cookiecutter==1.7.3 +torch +accelerate>=0.21.0 +protobuf +tokenizers<0.20,>=0.19 +torchaudio +librosa +pyctcdecode>=0.4.0 +phonemizer +kenlm +Pillow<=15.0,>=10.0.1 +optuna +ray[tune]>=2.7.0 +sigopt +timm<=0.9.16 +torchvision +codecarbon==1.2.0 +isort>=5.5.4 +urllib3<2.0.0 +fugashi>=1.0 +ipadic<2.0,>=1.0.0 +unidic_lite>=1.0.7 +unidic>=1.0.2 +sudachipy>=0.6.6 +sudachidict_core>=20220729 +rhoknp<1.3.1,>=1.1.0 +scikit-learn +onnxruntime>=1.4.0 +onnxruntime-tools>=1.4.2 + +[flax] +jax<=0.4.13,>=0.4.1 +jaxlib<=0.4.13,>=0.4.1 +flax<=0.7.0,>=0.4.1 +optax<=0.1.4,>=0.0.8 +scipy<1.13.0 + +[flax-speech] +librosa +pyctcdecode>=0.4.0 +phonemizer +kenlm + +[ftfy] +ftfy + +[integrations] +optuna +ray[tune]>=2.7.0 +sigopt + +[ja] +fugashi>=1.0 +ipadic<2.0,>=1.0.0 +unidic_lite>=1.0.7 +unidic>=1.0.2 +sudachipy>=0.6.6 +sudachidict_core>=20220729 +rhoknp<1.3.1,>=1.1.0 + +[modelcreation] +cookiecutter==1.7.3 + +[natten] +natten<0.15.0,>=0.14.6 + +[onnx] +onnxconverter-common +tf2onnx +onnxruntime>=1.4.0 +onnxruntime-tools>=1.4.2 + +[onnxruntime] +onnxruntime>=1.4.0 +onnxruntime-tools>=1.4.2 + +[optuna] +optuna + +[quality] +datasets!=2.5.0 +isort>=5.5.4 +ruff==0.4.4 +GitPython<3.1.19 +urllib3<2.0.0 + +[ray] +ray[tune]>=2.7.0 + +[retrieval] +faiss-cpu +datasets!=2.5.0 + +[ruff] +ruff==0.4.4 + +[sagemaker] +sagemaker>=2.31.0 + +[sentencepiece] +sentencepiece!=0.1.92,>=0.1.91 +protobuf + +[serving] +pydantic +uvicorn +fastapi +starlette + +[sigopt] +sigopt + +[sklearn] +scikit-learn + +[speech] +torchaudio +librosa +pyctcdecode>=0.4.0 +phonemizer +kenlm + +[testing] +pytest<8.0.0,>=7.2.0 +pytest-rich +pytest-xdist +timeout-decorator +parameterized +psutil +datasets!=2.5.0 +dill<0.3.5 +evaluate>=0.2.0 +pytest-timeout +ruff==0.4.4 +sacrebleu<2.0.0,>=1.4.12 +rouge-score!=0.0.7,!=0.0.8,!=0.1,!=0.1.1 +nltk +GitPython<3.1.19 +sacremoses +rjieba +beautifulsoup4 +tensorboard +pydantic +sentencepiece!=0.1.92,>=0.1.91 +faiss-cpu +cookiecutter==1.7.3 + +[tf] +tensorflow<2.16,>2.9 +onnxconverter-common +tf2onnx +tensorflow-text<2.16 +keras-nlp<0.14.0,>=0.3.1 + +[tf-cpu] +keras<2.16,>2.9 +tensorflow-cpu<2.16,>2.9 +onnxconverter-common +tf2onnx +tensorflow-text<2.16 +keras-nlp<0.14.0,>=0.3.1 +tensorflow-probability<0.24 + +[tf-speech] +librosa +pyctcdecode>=0.4.0 +phonemizer +kenlm + +[timm] +timm<=0.9.16 + +[tokenizers] +tokenizers<0.20,>=0.19 + +[torch] +torch +accelerate>=0.21.0 + +[torch-speech] +torchaudio +librosa +pyctcdecode>=0.4.0 +phonemizer +kenlm + +[torch-vision] +torchvision +Pillow<=15.0,>=10.0.1 + +[torchhub] +filelock +huggingface-hub<1.0,>=0.23.2 +importlib_metadata +numpy>=1.17 +packaging>=20.0 +protobuf +regex!=2019.12.17 +requests +sentencepiece!=0.1.92,>=0.1.91 +torch +tokenizers<0.20,>=0.19 +tqdm>=4.27 + +[video] +decord==0.6.0 +av==9.2.0 + +[vision] +Pillow<=15.0,>=10.0.1 diff --git a/lib/transformers/src/transformers.egg-info/top_level.txt b/lib/transformers/src/transformers.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..976a2b1f3998279c10c413279a095be86bf69167 --- /dev/null +++ b/lib/transformers/src/transformers.egg-info/top_level.txt @@ -0,0 +1 @@ +transformers diff --git a/lib/transformers/src/transformers/data/__pycache__/__init__.cpython-310.pyc b/lib/transformers/src/transformers/data/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9e419832a538db83ea529456af1bad7b27ff6be9 Binary files /dev/null and b/lib/transformers/src/transformers/data/__pycache__/__init__.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/data/__pycache__/data_collator.cpython-310.pyc b/lib/transformers/src/transformers/data/__pycache__/data_collator.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c3025be92a7bf1fa8c95c2c54bc12abf40e4bcc8 Binary files /dev/null and b/lib/transformers/src/transformers/data/__pycache__/data_collator.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/data/metrics/__pycache__/__init__.cpython-310.pyc b/lib/transformers/src/transformers/data/metrics/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1060bb5a860d48126c90327ba28345ba85159951 Binary files /dev/null and b/lib/transformers/src/transformers/data/metrics/__pycache__/__init__.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/data/metrics/__pycache__/squad_metrics.cpython-310.pyc b/lib/transformers/src/transformers/data/metrics/__pycache__/squad_metrics.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..514886783ff05a4c1bf2d49a81558715c7214124 Binary files /dev/null and b/lib/transformers/src/transformers/data/metrics/__pycache__/squad_metrics.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/data/processors/__pycache__/__init__.cpython-310.pyc b/lib/transformers/src/transformers/data/processors/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ca62698a1834cc2ab53d7757676ad24a84fca8cf Binary files /dev/null and b/lib/transformers/src/transformers/data/processors/__pycache__/__init__.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/data/processors/__pycache__/glue.cpython-310.pyc b/lib/transformers/src/transformers/data/processors/__pycache__/glue.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3e514eba0064a9e2428c1c9f73c773988644e01f Binary files /dev/null and b/lib/transformers/src/transformers/data/processors/__pycache__/glue.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/data/processors/__pycache__/squad.cpython-310.pyc b/lib/transformers/src/transformers/data/processors/__pycache__/squad.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..326c85018300ea592786027f77785c202941268f Binary files /dev/null and b/lib/transformers/src/transformers/data/processors/__pycache__/squad.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/data/processors/__pycache__/utils.cpython-310.pyc b/lib/transformers/src/transformers/data/processors/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f8eeb5dcbfa219bb96877ecd66ea998a1072c34 Binary files /dev/null and b/lib/transformers/src/transformers/data/processors/__pycache__/utils.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/data/processors/__pycache__/xnli.cpython-310.pyc b/lib/transformers/src/transformers/data/processors/__pycache__/xnli.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae7116277d9e18387855ea82843c00ce746f4e3f Binary files /dev/null and b/lib/transformers/src/transformers/data/processors/__pycache__/xnli.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/generation/__pycache__/__init__.cpython-310.pyc b/lib/transformers/src/transformers/generation/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e6c45579db87943c81ac919c61c195831028a866 Binary files /dev/null and b/lib/transformers/src/transformers/generation/__pycache__/__init__.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/generation/__pycache__/beam_constraints.cpython-310.pyc b/lib/transformers/src/transformers/generation/__pycache__/beam_constraints.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f53371fc9a0cafd5edefd9347fa89d5feddb71e8 Binary files /dev/null and b/lib/transformers/src/transformers/generation/__pycache__/beam_constraints.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/generation/__pycache__/beam_search.cpython-310.pyc b/lib/transformers/src/transformers/generation/__pycache__/beam_search.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..94b25f4c867faf5babf4d4809fa7a698d0774c8a Binary files /dev/null and b/lib/transformers/src/transformers/generation/__pycache__/beam_search.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/generation/__pycache__/candidate_generator.cpython-310.pyc b/lib/transformers/src/transformers/generation/__pycache__/candidate_generator.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4db75b595b881bc006381258dba1d35b178daa1c Binary files /dev/null and b/lib/transformers/src/transformers/generation/__pycache__/candidate_generator.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/generation/__pycache__/configuration_utils.cpython-310.pyc b/lib/transformers/src/transformers/generation/__pycache__/configuration_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa85692b4677d79ab94c2c77d6f5bf26e7d1601b Binary files /dev/null and b/lib/transformers/src/transformers/generation/__pycache__/configuration_utils.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/generation/__pycache__/logits_process.cpython-310.pyc b/lib/transformers/src/transformers/generation/__pycache__/logits_process.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1beb1009e797ee8f75cb5768c54b1095d1dd82df Binary files /dev/null and b/lib/transformers/src/transformers/generation/__pycache__/logits_process.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/generation/__pycache__/stopping_criteria.cpython-310.pyc b/lib/transformers/src/transformers/generation/__pycache__/stopping_criteria.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e51c0acfe1ad4dc7edff3ad405e9289e11a0f1c Binary files /dev/null and b/lib/transformers/src/transformers/generation/__pycache__/stopping_criteria.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/generation/__pycache__/utils.cpython-310.pyc b/lib/transformers/src/transformers/generation/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f37bf57412150485b53ba49c4e60bba58a13d468 Binary files /dev/null and b/lib/transformers/src/transformers/generation/__pycache__/utils.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/pipelines/__pycache__/audio_utils.cpython-310.pyc b/lib/transformers/src/transformers/pipelines/__pycache__/audio_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ca1e89d8a004d2111f2e02cb2a2eb94f65795b7 Binary files /dev/null and b/lib/transformers/src/transformers/pipelines/__pycache__/audio_utils.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/pipelines/__pycache__/document_question_answering.cpython-310.pyc b/lib/transformers/src/transformers/pipelines/__pycache__/document_question_answering.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..0b9e2d294cf714d345ee0d5862d5838182f01649 Binary files /dev/null and b/lib/transformers/src/transformers/pipelines/__pycache__/document_question_answering.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/pipelines/__pycache__/image_classification.cpython-310.pyc b/lib/transformers/src/transformers/pipelines/__pycache__/image_classification.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4fbca4d0a9d38c2b0ec8445d99cc838d1aea8c75 Binary files /dev/null and b/lib/transformers/src/transformers/pipelines/__pycache__/image_classification.cpython-310.pyc differ diff --git a/lib/transformers/src/transformers/pipelines/__pycache__/question_answering.cpython-310.pyc b/lib/transformers/src/transformers/pipelines/__pycache__/question_answering.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..69bf8b22d5b197290051119b3a1c609ca145f901 Binary files /dev/null and b/lib/transformers/src/transformers/pipelines/__pycache__/question_answering.cpython-310.pyc differ diff --git a/megatron_lm/megatron/__pycache__/global_vars.cpython-310.pyc b/megatron_lm/megatron/__pycache__/global_vars.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8dc82804b5ff31ccd3292168ec32e181d055b8bb Binary files /dev/null and b/megatron_lm/megatron/__pycache__/global_vars.cpython-310.pyc differ diff --git a/megatron_lm/megatron/core/datasets/__pycache__/blended_dataset.cpython-310.pyc b/megatron_lm/megatron/core/datasets/__pycache__/blended_dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..418c59c34818eb1b2d384571d51d23c8cdd46a0c Binary files /dev/null and b/megatron_lm/megatron/core/datasets/__pycache__/blended_dataset.cpython-310.pyc differ diff --git a/megatron_lm/megatron/core/datasets/__pycache__/blended_megatron_dataset_builder.cpython-310.pyc b/megatron_lm/megatron/core/datasets/__pycache__/blended_megatron_dataset_builder.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..54014e8556ea8db430fc4cd3359d67e13850a597 Binary files /dev/null and b/megatron_lm/megatron/core/datasets/__pycache__/blended_megatron_dataset_builder.cpython-310.pyc differ diff --git a/megatron_lm/megatron/core/datasets/__pycache__/blended_megatron_dataset_config.cpython-310.pyc b/megatron_lm/megatron/core/datasets/__pycache__/blended_megatron_dataset_config.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5ae2e9fc87a89c2849073cb8d99041205b13a6a6 Binary files /dev/null and b/megatron_lm/megatron/core/datasets/__pycache__/blended_megatron_dataset_config.cpython-310.pyc differ diff --git a/megatron_lm/megatron/core/datasets/__pycache__/gpt_dataset.cpython-310.pyc b/megatron_lm/megatron/core/datasets/__pycache__/gpt_dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..917274de9963d84756738804de9d2670e5d5969a Binary files /dev/null and b/megatron_lm/megatron/core/datasets/__pycache__/gpt_dataset.cpython-310.pyc differ diff --git a/megatron_lm/megatron/core/datasets/__pycache__/indexed_dataset.cpython-310.pyc b/megatron_lm/megatron/core/datasets/__pycache__/indexed_dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cfba63d9c4fbac06a2f0c599c89388381ed30d4a Binary files /dev/null and b/megatron_lm/megatron/core/datasets/__pycache__/indexed_dataset.cpython-310.pyc differ diff --git a/megatron_lm/megatron/core/datasets/__pycache__/megatron_dataset.cpython-310.pyc b/megatron_lm/megatron/core/datasets/__pycache__/megatron_dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..467586df3a16962d8782af03179bd707958bc0cc Binary files /dev/null and b/megatron_lm/megatron/core/datasets/__pycache__/megatron_dataset.cpython-310.pyc differ diff --git a/megatron_lm/megatron/core/datasets/__pycache__/megatron_tokenizer.cpython-310.pyc b/megatron_lm/megatron/core/datasets/__pycache__/megatron_tokenizer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fd38f55f4143a64116abc443d6fd679891fcbe32 Binary files /dev/null and b/megatron_lm/megatron/core/datasets/__pycache__/megatron_tokenizer.cpython-310.pyc differ diff --git a/megatron_lm/megatron/core/datasets/__pycache__/utils.cpython-310.pyc b/megatron_lm/megatron/core/datasets/__pycache__/utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b5b221ab77f6fc038ff7368a80732732e7213b6c Binary files /dev/null and b/megatron_lm/megatron/core/datasets/__pycache__/utils.cpython-310.pyc differ diff --git a/megatron_lm/megatron/core/datasets/helpers.cpython-310-x86_64-linux-gnu.so b/megatron_lm/megatron/core/datasets/helpers.cpython-310-x86_64-linux-gnu.so new file mode 100644 index 0000000000000000000000000000000000000000..2756453024a7fe7b88cf0564b9f858ff2cef8ecc Binary files /dev/null and b/megatron_lm/megatron/core/datasets/helpers.cpython-310-x86_64-linux-gnu.so differ diff --git a/megatron_lm/megatron/data/__pycache__/data_samplers.cpython-310.pyc b/megatron_lm/megatron/data/__pycache__/data_samplers.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d18f064488df46262f0f83e21e2bb5291f4a230c Binary files /dev/null and b/megatron_lm/megatron/data/__pycache__/data_samplers.cpython-310.pyc differ diff --git a/megatron_lm/megatron/tokenizer/__pycache__/__init__.cpython-310.pyc b/megatron_lm/megatron/tokenizer/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..90b12184b83201543f78c5acaa0beabae8f70ea0 Binary files /dev/null and b/megatron_lm/megatron/tokenizer/__pycache__/__init__.cpython-310.pyc differ diff --git a/megatron_lm/megatron/tokenizer/__pycache__/tokenizer.cpython-310.pyc b/megatron_lm/megatron/tokenizer/__pycache__/tokenizer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..43e0a755d2031fda56d5ac7d58fa29f816000f8d Binary files /dev/null and b/megatron_lm/megatron/tokenizer/__pycache__/tokenizer.cpython-310.pyc differ diff --git a/src/llama_recipes/__pycache__/arguments.cpython-310.pyc b/src/llama_recipes/__pycache__/arguments.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..79805b56017d10155bd3ed5c5b22d45085f98fcd Binary files /dev/null and b/src/llama_recipes/__pycache__/arguments.cpython-310.pyc differ diff --git a/src/llama_recipes/__pycache__/finetuning.cpython-310.pyc b/src/llama_recipes/__pycache__/finetuning.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6d24c2985eddfaa46ee6b2c3a952a526049887fd Binary files /dev/null and b/src/llama_recipes/__pycache__/finetuning.cpython-310.pyc differ diff --git a/src/llama_recipes/__pycache__/get_fsdp.cpython-310.pyc b/src/llama_recipes/__pycache__/get_fsdp.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..145c20a8b113d60f8f07760dc031dee771f0e101 Binary files /dev/null and b/src/llama_recipes/__pycache__/get_fsdp.cpython-310.pyc differ diff --git a/src/llama_recipes/__pycache__/get_model_decoder_layer.cpython-310.pyc b/src/llama_recipes/__pycache__/get_model_decoder_layer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..49d625213e96a4da5950abd2d1ee9e45913f20b2 Binary files /dev/null and b/src/llama_recipes/__pycache__/get_model_decoder_layer.cpython-310.pyc differ diff --git a/src/llama_recipes/__pycache__/get_models.cpython-310.pyc b/src/llama_recipes/__pycache__/get_models.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ee4fe2bc9bf7a7c3fca9b11c34ac59f880361353 Binary files /dev/null and b/src/llama_recipes/__pycache__/get_models.cpython-310.pyc differ diff --git a/src/llama_recipes/__pycache__/optimizer.cpython-310.pyc b/src/llama_recipes/__pycache__/optimizer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3052f3d5da92c34232479837171c2795bbf9d3dd Binary files /dev/null and b/src/llama_recipes/__pycache__/optimizer.cpython-310.pyc differ diff --git a/src/llama_recipes/datasets/__pycache__/pretrain_dataset.cpython-310.pyc b/src/llama_recipes/datasets/__pycache__/pretrain_dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..397efd7df014cd2e5895f705ad1ee44a98cfb7fb Binary files /dev/null and b/src/llama_recipes/datasets/__pycache__/pretrain_dataset.cpython-310.pyc differ diff --git a/src/llama_recipes/policies/__pycache__/__init__.cpython-310.pyc b/src/llama_recipes/policies/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..99fbff4ed28838f70bd4a4bcd8fd0683b7995d2b Binary files /dev/null and b/src/llama_recipes/policies/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/llama_recipes/policies/__pycache__/activation_checkpointing_functions.cpython-310.pyc b/src/llama_recipes/policies/__pycache__/activation_checkpointing_functions.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4e37c85560a971c355a2aaa4cda95d4f9fc5526a Binary files /dev/null and b/src/llama_recipes/policies/__pycache__/activation_checkpointing_functions.cpython-310.pyc differ diff --git a/src/llama_recipes/policies/__pycache__/anyprecision_optimizer.cpython-310.pyc b/src/llama_recipes/policies/__pycache__/anyprecision_optimizer.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..460617bed60c97f096c3a06f3c61bbf4550f94df Binary files /dev/null and b/src/llama_recipes/policies/__pycache__/anyprecision_optimizer.cpython-310.pyc differ diff --git a/src/llama_recipes/policies/__pycache__/mixed_precision.cpython-310.pyc b/src/llama_recipes/policies/__pycache__/mixed_precision.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..526fedb74e0e44ebdf69f2d331e5a54d3dd3680a Binary files /dev/null and b/src/llama_recipes/policies/__pycache__/mixed_precision.cpython-310.pyc differ diff --git a/src/llama_recipes/policies/__pycache__/wrapping.cpython-310.pyc b/src/llama_recipes/policies/__pycache__/wrapping.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7ef64ac2298de019612fdccc2e8fc83c000501cb Binary files /dev/null and b/src/llama_recipes/policies/__pycache__/wrapping.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/__init__.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d630858c116aad23bd4bcfcbb67da07625b5f432 Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/__init__.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/checkpoint.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/checkpoint.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7c28347eba469d0267e400deea8070ac4d2b866a Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/checkpoint.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/distributed.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/distributed.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc672983f7533349d1dcbdbbe41041563ebbf6ec Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/distributed.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/efficient_instruction_tuning.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/efficient_instruction_tuning.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5cea05ffe7318f49c163b7af7451c50e139a3674 Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/efficient_instruction_tuning.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/fsdp_utils.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/fsdp_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ff2bf8d8930ce92f58cb21a266b17944b67f8d2f Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/fsdp_utils.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/hf_hub_utils.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/hf_hub_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41ffa7982110475d772ac3e88a6dc32a44a4bad0 Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/hf_hub_utils.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/instruction_tuning.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/instruction_tuning.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..993542f2227f317a8e1c2bf29963077c608702a8 Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/instruction_tuning.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/memory_utils.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/memory_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cd708002e37744ae30f28fcaeac4adcc4e3f71a1 Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/memory_utils.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/random.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/random.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c0aa5839a3e9ccca257d4f2dc09557ba477d843 Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/random.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/sequence_length_warmup.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/sequence_length_warmup.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c6493296cd1a99dfb5d07174600a17040a4daa04 Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/sequence_length_warmup.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/train_utils.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/train_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d9e84330809347593074e66aa1bae62c66faae61 Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/train_utils.cpython-310.pyc differ diff --git a/src/llama_recipes/utils/__pycache__/wandb_utils.cpython-310.pyc b/src/llama_recipes/utils/__pycache__/wandb_utils.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a70af12ee51ab796b0181cd2ae87ba49328e4e50 Binary files /dev/null and b/src/llama_recipes/utils/__pycache__/wandb_utils.cpython-310.pyc differ diff --git a/wandb/run-20240804_040133-gue8fknz/files/config.yaml b/wandb/run-20240804_040133-gue8fknz/files/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6cf3fb5c3c17207acc5eaf31e70b149bcbd58c8 --- /dev/null +++ b/wandb/run-20240804_040133-gue8fknz/files/config.yaml @@ -0,0 +1,335 @@ +wandb_version: 1 + +sharding_strategy: + desc: null + value: FULL_SHARD +checkpoint_type: + desc: null + value: LOCAL_STATE_DICT +fsdp_activation_checkpointing: + desc: null + value: true +fsdp_cpu_offload: + desc: null + value: false +low_cpu_fsdp: + desc: null + value: false +no_meta_device: + desc: null + value: false +data_path: + desc: null + value: null +split: + desc: null + value: 969, 30, 1 +train_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +valid_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +test_data_path: + desc: null + value: + - '4013541' + - /work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document +data_cache_path: + desc: null + value: null +vocab_size: + desc: null + value: null +vocab_file: + desc: null + value: null +merge_file: + desc: null + value: null +seq_length: + desc: null + value: 256 +num_workers: + desc: null + value: 2 +tokenizer_type: + desc: null + value: Llama2Tokenizer +tokenizer_model: + desc: null + value: /share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model +reset_position_ids: + desc: null + value: false +reset_attention_mask: + desc: null + value: false +eod_mask_loss: + desc: null + value: false +retro_return_doc_ids: + desc: null + value: false +short_seq_prob: + desc: null + value: 0.1 +vocab_extra_ids: + desc: null + value: 0 +seed: + desc: null + value: 1234 +use_mpi: + desc: null + value: false +wandb_entity: + desc: null + value: iwakawa-koichi-q5-tohoku-nlp6723 +wandb_name: + desc: null + value: tiny-llama-sample_train_2024-08-04-04:01:22 +wandb_project: + desc: null + value: llm_tutorial +quantization: + desc: null + value: false +use_freeze_layers: + desc: null + value: false +freeze_layers: + desc: null + value: null +bf16: + desc: null + value: true +fp16: + desc: null + value: false +mixed_precision: + desc: null + value: true +param_dtype: + desc: null + value: null +load: + desc: null + value: /work/llm_recipes/models/tiny-llama-sample +save: + desc: null + value: /work/llm_recipes/models/tiny-llama-sample +base_model: + desc: null + value: /share/pretrained_lm/meta-llama/TinyLlama_v1.1 +use_better_transformer: + desc: null + value: false +grad_clip_norm: + desc: null + value: 1.0 +eval_interval: + desc: null + value: 200 +save_interval: + desc: null + value: 200 +eval_iters: + desc: null + value: 10 +optimizer: + desc: null + value: adam +lr: + desc: null + value: 2.0e-05 +lr_decay_style: + desc: null + value: cosine +lr_decay_iters: + desc: null + value: 2000 +lr_warmup_iters: + desc: null + value: 500 +min_lr: + desc: null + value: 1.0e-06 +train_iters: + desc: null + value: 2000 +train_samples: + desc: null + value: null +global_batch_size: + desc: null + value: 320 +micro_batch_size: + desc: null + value: 8 +make_vocab_size_divisible_by: + desc: null + value: 128 +sliding_window_size: + desc: null + value: 2048 +skip_batch: + desc: null + value: null +no_save_optimizer_state: + desc: null + value: false +continual_pretraining: + desc: null + value: false +instruction_tuning: + desc: null + value: false +direct_preference_optimization: + desc: null + value: false +attention_dropout: + desc: null + value: 0.1 +hidden_dropout: + desc: null + value: 0.1 +weight_decay: + desc: null + value: 0.1 +adam_beta1: + desc: null + value: 0.9 +adam_beta2: + desc: null + value: 0.95 +adam_eps: + desc: null + value: 1.0e-06 +hf_transformer_model_dir: + desc: null + value: null +instruction_train_data_path: + desc: null + value: null +instruction_valid_data_path: + desc: null + value: null +epoch: + desc: null + value: null +instruction_dataset_size: + desc: null + value: null +save_sampler_state: + desc: null + value: false +label_smoothing: + desc: null + value: 0.0 +save_n_checkpoints: + desc: null + value: 10 +hf_repo_id: + desc: null + value: koichi12/tiny-llama-sample +create_public_hf_repo: + desc: null + value: false +upload_all_checkpoints_to_hf: + desc: null + value: false +hf_upload_retry_limit: + desc: null + value: 2 +exit_duration_in_mins: + desc: null + value: null +source_key: + desc: null + value: null +target_key: + desc: null + value: null +attn_implementation: + desc: null + value: flash_attention_2 +efficient_instruction_tuning: + desc: null + value: false +remove_padding_masking: + desc: null + value: false +save_start_iter: + desc: null + value: null +rank: + desc: null + value: 0 +world_size: + desc: null + value: 1 +padded_vocab_size: + desc: null + value: 32000 +gradient_accumulation_steps: + desc: null + value: 40 +_wandb: + desc: null + value: + python_version: 3.10.12 + cli_version: 0.16.3 + framework: huggingface + huggingface_version: 4.43.3 + is_jupyter_run: false + is_kaggle_kernel: false + start_time: 1722711693.280215 + t: + 1: + - 1 + - 11 + - 49 + - 55 + - 71 + 2: + - 1 + - 11 + - 49 + - 55 + - 71 + 3: + - 13 + - 16 + - 23 + 4: 3.10.12 + 5: 0.16.3 + 6: 4.43.3 + 8: + - 5 + 13: linux-x86_64 +activation_function: + desc: null + value: silu +hidden_size: + desc: null + value: 2048 +model_type: + desc: null + value: llama +max_position_embeddings: + desc: null + value: 2048 +num_attention_heads: + desc: null + value: 32 +num_hidden_layers: + desc: null + value: 22 +model_architecture: + desc: null + value: LlamaForCausalLM diff --git a/wandb/run-20240804_040133-gue8fknz/files/output.log b/wandb/run-20240804_040133-gue8fknz/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..f2efd2c17e537c8f9b32e93dcffbaf0174d6f013 --- /dev/null +++ b/wandb/run-20240804_040133-gue8fknz/files/output.log @@ -0,0 +1,130 @@ +Created Hugging Face repository with ID koichi12/tiny-llama-sample. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +File not found: /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +File not found: /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/tiny-llama-sample, skipping model loading +--> Model /share/pretrained_lm/meta-llama/TinyLlama_v1.1 +--> /share/pretrained_lm/meta-llama/TinyLlama_v1.1 has 1100.048384 Million params +You are attempting to use Flash Attention 2.0 without specifying a torch dtype. This might lead to unexpected behaviour +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaForCausalLM is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)` +Flash Attention 2.0 only supports torch.float16 and torch.bfloat16 dtypes, but the current dype in LlamaModel is torch.float32. You should run training or inference using Automatic Mixed-Precision via the `with torch.autocast(device_type='torch_device'):` decorator, or load the model with the `torch_dtype` argument. Example: `model = AutoModel.from_pretrained("openai/whisper-tiny", attn_implementation="flash_attention_2", torch_dtype=torch.float16)` +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_init_utils.py:441: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1. + warnings.warn( +BFloat16 enabled for mixed precision - using bfSixteen policy +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 640000 + validation: 35200 + test: 3200 +> building train, validation, and test datasets for GPT ... +> finished creating GPT datasets ... +File not found: /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/tiny-llama-sample, skipping optimizer loading +File not found: /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/tiny-llama-sample/latest_iteration.txt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): LlamaForCausalLM( + (model): LlamaModel( + (embed_tokens): Embedding(32000, 2048) + (layers): ModuleList( + (0-21): 22 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): LlamaDecoderLayer( + (self_attn): LlamaFlashAttention2( + (q_proj): Linear(in_features=2048, out_features=2048, bias=False) + (k_proj): Linear(in_features=2048, out_features=256, bias=False) + (v_proj): Linear(in_features=2048, out_features=256, bias=False) + (o_proj): Linear(in_features=2048, out_features=2048, bias=False) + (rotary_emb): LlamaRotaryEmbedding() + ) + (mlp): LlamaMLP( + (gate_proj): Linear(in_features=2048, out_features=5632, bias=False) + (up_proj): Linear(in_features=2048, out_features=5632, bias=False) + (down_proj): Linear(in_features=5632, out_features=2048, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): LlamaRMSNorm() + (post_attention_layernorm): LlamaRMSNorm() + ) + ) + ) + ) + (norm): LlamaRMSNorm() + (rotary_emb): LlamaRotaryEmbedding() + ) + (lm_head): Linear(in_features=2048, out_features=32000, bias=False) + ) +) +model config: LlamaConfig { + "_name_or_path": "/share/pretrained_lm/meta-llama/TinyLlama_v1.1", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 5632, + "label_smoothing": 0.0, + "max_position_embeddings": 2048, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 22, + "num_key_value_heads": 4, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 10000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.43.3", + "use_cache": false, + "vocab_size": 32000 +} +Let split = None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Traceback (most recent call last): + File "/project/examples/finetuning.py", line 13, in + main() + File "/project/src/llama_recipes/finetuning.py", line 281, in main + train( + File "/project/src/llama_recipes/utils/train_utils.py", line 110, in train + loss: torch.Tensor = model(**batch).loss + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward + output = self._fsdp_wrapped_module(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/project/lib/transformers/src/transformers/models/llama/modeling_llama.py", line 1141, in forward + outputs = self.model( + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl + return self._call_impl(*args, **kwargs) + File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1520, in _call_impl + return forward_call(*args, **kwargs) + File "/project/lib/transformers/src/transformers/models/llama/modeling_llama.py", line 908, in forward + cache_position = torch.arange( +RuntimeError: CUDA error: device-side assert triggered +CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect. +For debugging consider passing CUDA_LAUNCH_BLOCKING=1. +Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions. \ No newline at end of file diff --git a/wandb/run-20240804_040133-gue8fknz/files/requirements.txt b/wandb/run-20240804_040133-gue8fknz/files/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c90fd1c54d0a9881f6b4c6465b2a4fa88c9056c --- /dev/null +++ b/wandb/run-20240804_040133-gue8fknz/files/requirements.txt @@ -0,0 +1,271 @@ +absl-py==2.1.0 +accelerate==0.33.0 +aiohttp==3.9.1 +aiosignal==1.3.1 +annotated-types==0.6.0 +apex==0.1 +appdirs==1.4.4 +argon2-cffi-bindings==21.2.0 +argon2-cffi==23.1.0 +asttokens==2.4.1 +astunparse==1.6.3 +async-timeout==4.0.3 +attrs==23.2.0 +audioread==3.0.1 +beautifulsoup4==4.12.3 +bleach==6.1.0 +blis==0.7.11 +cachetools==5.3.2 +catalogue==2.0.10 +certifi==2024.2.2 +cffi==1.16.0 +charset-normalizer==3.3.2 +click==8.1.7 +cloudpathlib==0.16.0 +cloudpickle==3.0.0 +cmake==3.28.1 +colorama==0.4.6 +comm==0.2.1 +confection==0.1.4 +contourpy==1.2.0 +cubinlinker==0.3.0+2.g405ac64 +cuda-python==12.3.0rc4+9.gdb8c48a.dirty +cudf==23.12.0 +cugraph-dgl==23.12.0 +cugraph-service-client==23.12.0 +cugraph-service-server==23.12.0 +cugraph==23.12.0 +cuml==23.12.0 +cupy-cuda12x==12.3.0 +cycler==0.12.1 +cymem==2.0.8 +cython==3.0.8 +dask-cuda==23.12.0 +dask-cudf==23.12.0 +dask==2023.11.0 +debugpy==1.8.1 +decorator==5.1.1 +defusedxml==0.7.1 +distributed==2023.11.0 +dm-tree==0.1.8 +docker-pycreds==0.4.0 +einops==0.7.0 +exceptiongroup==1.2.0 +execnet==2.0.2 +executing==2.0.1 +expecttest==0.1.3 +fastjsonschema==2.19.1 +fastrlock==0.8.2 +filelock==3.13.1 +flash-attn==2.4.2 +fonttools==4.48.1 +frozenlist==1.4.1 +fsspec==2023.12.2 +gast==0.5.4 +gitdb==4.0.11 +gitpython==3.1.43 +google-auth-oauthlib==0.4.6 +google-auth==2.27.0 +graphsurgeon==0.4.6 +grpcio==1.60.1 +huggingface-hub==0.24.5 +hypothesis==5.35.1 +idna==3.6 +importlib-metadata==7.0.1 +iniconfig==2.0.0 +intel-openmp==2021.4.0 +ipadic==1.0.0 +ipykernel==6.29.2 +ipython-genutils==0.2.0 +ipython==8.21.0 +jedi==0.19.1 +jinja2==3.1.3 +joblib==1.3.2 +json5==0.9.14 +jsonnet==0.19.1 +jsonschema-specifications==2023.12.1 +jsonschema==4.21.1 +jupyter-client==8.6.0 +jupyter-core==5.7.1 +jupyter-tensorboard==0.2.0 +jupyterlab-pygments==0.3.0 +jupyterlab-server==1.2.0 +jupyterlab==2.3.2 +jupytext==1.16.1 +kiwisolver==1.4.5 +langcodes==3.3.0 +lazy-loader==0.3 +librosa==0.10.1 +llvmlite==0.40.1 +locket==1.0.0 +logzero==1.7.0 +lxml==5.2.2 +markdown-it-py==3.0.0 +markdown==3.5.2 +markupsafe==2.1.4 +matplotlib-inline==0.1.6 +matplotlib==3.8.2 +mdit-py-plugins==0.4.0 +mdurl==0.1.2 +mecab-python3==1.0.6 +mistune==3.0.2 +mkl-devel==2021.1.1 +mkl-include==2021.1.1 +mkl==2021.1.1 +mock==5.1.0 +more-itertools==9.1.0 +mpmath==1.3.0 +msgpack==1.0.7 +multidict==6.0.4 +murmurhash==1.0.10 +nbclient==0.9.0 +nbconvert==7.16.0 +nbformat==5.9.2 +nest-asyncio==1.6.0 +networkx==2.6.3 +ninja==1.11.1.1 +nltk==3.8.1 +notebook==6.4.10 +numba==0.57.1+1.g1ff679645 +numpy==1.24.4 +nvfuser==0.1.4a0+d0bb811 +nvidia-dali-cuda120==1.34.0 +nvidia-pyindex==1.0.9 +nvtx==0.2.5 +oauthlib==3.2.2 +onnx==1.15.0rc2 +opencv==4.7.0 +optree==0.10.0 +packaging==23.2 +pandas==1.5.3 +pandocfilters==1.5.1 +parso==0.8.3 +partd==1.4.1 +peft==0.11.1 +pexpect==4.9.0 +pillow==10.2.0 +pip==24.0 +platformdirs==4.2.0 +pluggy==1.4.0 +ply==3.11 +polygraphy==0.49.4 +pooch==1.8.0 +portalocker==2.10.1 +preshed==3.0.9 +prettytable==3.9.0 +prometheus-client==0.19.0 +prompt-toolkit==3.0.43 +protobuf==4.24.4 +psutil==5.9.4 +ptxcompiler==0.8.1+2.g0d406d6 +ptyprocess==0.7.0 +pure-eval==0.2.2 +pyarrow==14.0.1.dev0+gba5374836.d20240125 +pyasn1-modules==0.3.0 +pyasn1==0.5.1 +pybind11-global==2.11.1 +pybind11==2.11.1 +pycocotools==2.0+nv0.8.0 +pycparser==2.21 +pydantic-core==2.16.2 +pydantic==2.6.1 +pygments==2.17.2 +pylibcugraph==23.12.0 +pylibcugraphops==23.12.0 +pylibraft==23.12.0 +pynvml==11.4.1 +pyparsing==3.1.1 +pytest-flakefinder==1.1.0 +pytest-rerunfailures==13.0 +pytest-shard==0.1.2 +pytest-xdist==3.5.0 +pytest==8.0.0 +python-dateutil==2.8.2 +python-dotenv==1.0.0 +python-hostlist==1.23.0 +pytorch-quantization==2.1.2 +pytz==2023.3.post1 +pyyaml==6.0.1 +pyzmq==25.1.2 +raft-dask==23.12.0 +rapids-dask-dependency==23.12.1 +referencing==0.33.0 +regex==2023.12.25 +requests-oauthlib==1.3.1 +requests==2.31.0 +rich==13.7.0 +rmm==23.12.0 +rpds-py==0.17.1 +rsa==4.9 +sacrebleu==2.4.0 +safetensors==0.4.3 +scikit-learn==1.2.0 +scipy==1.12.0 +send2trash==1.8.2 +sentencepiece==0.1.99 +sentry-sdk==2.12.0 +setproctitle==1.3.3 +setuptools==68.2.2 +six==1.16.0 +smart-open==6.4.0 +smmap==5.0.1 +sortedcontainers==2.4.0 +soundfile==0.12.1 +soupsieve==2.5 +soxr==0.3.7 +spacy-legacy==3.0.12 +spacy-loggers==1.0.5 +spacy==3.7.2 +sphinx-glpi-theme==0.6 +srsly==2.4.8 +stack-data==0.6.3 +sympy==1.12 +tabulate==0.9.0 +tbb==2021.11.0 +tblib==3.0.0 +tensorboard-data-server==0.6.1 +tensorboard-plugin-wit==1.8.1 +tensorboard==2.9.0 +tensorrt==8.6.3 +terminado==0.18.0 +termplotlib==0.3.9 +thinc==8.2.3 +threadpoolctl==3.2.0 +thriftpy2==0.4.17 +tinycss2==1.2.1 +tokenizers==0.19.1 +toml==0.10.2 +tomli==2.0.1 +toolz==0.12.1 +torch-tensorrt==2.3.0a0 +torch==2.3.0a0+ebedce2 +torchdata==0.7.1a0 +torchtext==0.17.0a0 +torchvision==0.18.0a0 +tornado==6.4 +tqdm==4.66.1 +traitlets==5.9.0 +transformer-engine==1.3.0+5b90b7f +transformers==4.43.3 +treelite-runtime==3.9.1 +treelite==3.9.1 +triton==2.2.0+e28a256 +typer==0.9.0 +types-dataclasses==0.6.6 +typing-extensions==4.9.0 +ucx-py==0.35.0 +uff==0.6.9 +ujson==5.8.0 +urllib3==1.26.18 +wandb==0.16.3 +wasabi==1.1.2 +wcwidth==0.2.13 +weasel==0.3.4 +webencodings==0.5.1 +werkzeug==3.0.1 +wheel==0.42.0 +xdoctest==1.0.2 +xgboost==1.7.6 +yarl==1.9.4 +zict==3.0.0 +zipp==3.17.0 \ No newline at end of file diff --git a/wandb/run-20240804_040133-gue8fknz/files/wandb-metadata.json b/wandb/run-20240804_040133-gue8fknz/files/wandb-metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..392165e486a117b6de816f230342dfc620898cf9 --- /dev/null +++ b/wandb/run-20240804_040133-gue8fknz/files/wandb-metadata.json @@ -0,0 +1,215 @@ +{ + "os": "Linux-5.15.0-91-generic-x86_64-with-glibc2.35", + "python": "3.10.12", + "heartbeatAt": "2024-08-03T19:01:33.862827", + "startedAt": "2024-08-03T19:01:33.267895", + "docker": null, + "cuda": null, + "args": [ + "--seq-length", + "256", + "--sliding-window-size", + "2048", + "--micro-batch-size", + "8", + "--global-batch-size", + "320", + "--train-iters", + "2000", + "--tokenizer-type", + "Llama2Tokenizer", + "--tokenizer-model", + "/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model", + "--train-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--valid-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--test-data-path", + "4013541", + "/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document", + "--lr", + "2e-5", + "--min-lr", + "1e-6", + "--lr-decay-style", + "cosine", + "--lr-warmup-iters", + "500", + "--lr-decay-iters", + "2000", + "--weight-decay", + "0.1", + "--grad-clip-norm", + "1.0", + "--optimizer", + "adam", + "--adam-beta1", + "0.9", + "--adam-beta2", + "0.95", + "--adam-eps", + "1e-6", + "--save-interval", + "200", + "--eval-interval", + "200", + "--eval-iters", + "10", + "--bf16", + "--mixed-precision", + "--base-model", + "/share/pretrained_lm/meta-llama/TinyLlama_v1.1", + "--save", + "/work/llm_recipes/models/tiny-llama-sample", + "--load", + "/work/llm_recipes/models/tiny-llama-sample", + "--fsdp-activation-checkpointing", + "--sharding-strategy", + "FULL_SHARD", + "--checkpoint-type", + "LOCAL_STATE_DICT", + "--save-n-checkpoints", + "10", + "--hf-upload-retry-limit", + "2", + "--hf-repo-id", + "koichi12/tiny-llama-sample", + "--wandb-entity", + "iwakawa-koichi-q5-tohoku-nlp6723", + "--wandb-project", + "llm_tutorial", + "--wandb-name", + "tiny-llama-sample_train_2024-08-04-04:01:22" + ], + "state": "running", + "program": "/project/examples/finetuning.py", + "codePathLocal": "examples/finetuning.py", + "codePath": "examples/finetuning.py", + "git": { + "remote": "https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git", + "commit": "3be5353210a678dc7008f237fa16b99f2bdf36ea" + }, + "email": null, + "root": "/project", + "host": "gpu-koiwa-00", + "username": "koiwa", + "executable": "/usr/bin/python", + "cpu_count": 18, + "cpu_count_logical": 18, + "cpu_freq": { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + "cpu_freq_per_core": [ + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + }, + { + "current": 2400.034, + "min": 0.0, + "max": 0.0 + } + ], + "disk": { + "/": { + "total": 0.0625, + "used": 1.1444091796875e-05 + } + }, + "gpu": "NVIDIA A100-SXM4-40GB", + "gpu_count": 1, + "gpu_devices": [ + { + "name": "NVIDIA A100-SXM4-40GB", + "memory_total": 42949672960 + } + ], + "memory": { + "total": 56.48782730102539 + } +} diff --git a/wandb/run-20240804_040133-gue8fknz/files/wandb-summary.json b/wandb/run-20240804_040133-gue8fknz/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..4ac1ba993956be52f459e677e193218ff2cddc76 --- /dev/null +++ b/wandb/run-20240804_040133-gue8fknz/files/wandb-summary.json @@ -0,0 +1 @@ +{"_wandb": {"runtime": 3}} \ No newline at end of file diff --git a/wandb/run-20240804_040133-gue8fknz/logs/debug-internal.log b/wandb/run-20240804_040133-gue8fknz/logs/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..fefa4583a95f6a91a73e2e0e6127560e515a157e --- /dev/null +++ b/wandb/run-20240804_040133-gue8fknz/logs/debug-internal.log @@ -0,0 +1,186 @@ +2024-08-04 04:01:33,281 INFO StreamThr :13361 [internal.py:wandb_internal():86] W&B internal server running at pid: 13361, started at: 2024-08-04 04:01:33.280792 +2024-08-04 04:01:33,283 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: status +2024-08-04 04:01:33,285 INFO WriterThread:13361 [datastore.py:open_for_write():87] open: /project/wandb/run-20240804_040133-gue8fknz/run-gue8fknz.wandb +2024-08-04 04:01:33,286 DEBUG SenderThread:13361 [sender.py:send():382] send: header +2024-08-04 04:01:33,299 DEBUG SenderThread:13361 [sender.py:send():382] send: run +2024-08-04 04:01:33,750 INFO SenderThread:13361 [dir_watcher.py:__init__():211] watching files in: /project/wandb/run-20240804_040133-gue8fknz/files +2024-08-04 04:01:33,751 INFO SenderThread:13361 [sender.py:_start_run_threads():1136] run started: gue8fknz with start time 1722711693.280215 +2024-08-04 04:01:33,756 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: check_version +2024-08-04 04:01:33,756 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: check_version +2024-08-04 04:01:33,841 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: run_start +2024-08-04 04:01:33,848 DEBUG HandlerThread:13361 [system_info.py:__init__():27] System info init +2024-08-04 04:01:33,848 DEBUG HandlerThread:13361 [system_info.py:__init__():42] System info init done +2024-08-04 04:01:33,848 INFO HandlerThread:13361 [system_monitor.py:start():194] Starting system monitor +2024-08-04 04:01:33,848 INFO SystemMonitor:13361 [system_monitor.py:_start():158] Starting system asset monitoring threads +2024-08-04 04:01:33,848 INFO HandlerThread:13361 [system_monitor.py:probe():214] Collecting system info +2024-08-04 04:01:33,849 INFO SystemMonitor:13361 [interfaces.py:start():190] Started cpu monitoring +2024-08-04 04:01:33,849 INFO SystemMonitor:13361 [interfaces.py:start():190] Started disk monitoring +2024-08-04 04:01:33,850 INFO SystemMonitor:13361 [interfaces.py:start():190] Started gpu monitoring +2024-08-04 04:01:33,851 INFO SystemMonitor:13361 [interfaces.py:start():190] Started memory monitoring +2024-08-04 04:01:33,851 INFO SystemMonitor:13361 [interfaces.py:start():190] Started network monitoring +2024-08-04 04:01:33,862 DEBUG HandlerThread:13361 [system_info.py:probe():151] Probing system +2024-08-04 04:01:33,864 DEBUG HandlerThread:13361 [system_info.py:_probe_git():136] Probing git +2024-08-04 04:01:33,876 DEBUG HandlerThread:13361 [system_info.py:_probe_git():144] Probing git done +2024-08-04 04:01:33,877 DEBUG HandlerThread:13361 [system_info.py:probe():199] Probing system done +2024-08-04 04:01:33,877 DEBUG HandlerThread:13361 [system_monitor.py:probe():223] {'os': 'Linux-5.15.0-91-generic-x86_64-with-glibc2.35', 'python': '3.10.12', 'heartbeatAt': '2024-08-03T19:01:33.862827', 'startedAt': '2024-08-03T19:01:33.267895', 'docker': None, 'cuda': None, 'args': ('--seq-length', '256', '--sliding-window-size', '2048', '--micro-batch-size', '8', '--global-batch-size', '320', '--train-iters', '2000', '--tokenizer-type', 'Llama2Tokenizer', '--tokenizer-model', '/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model', '--train-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--valid-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--test-data-path', '4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document', '--lr', '2e-5', '--min-lr', '1e-6', '--lr-decay-style', 'cosine', '--lr-warmup-iters', '500', '--lr-decay-iters', '2000', '--weight-decay', '0.1', '--grad-clip-norm', '1.0', '--optimizer', 'adam', '--adam-beta1', '0.9', '--adam-beta2', '0.95', '--adam-eps', '1e-6', '--save-interval', '200', '--eval-interval', '200', '--eval-iters', '10', '--bf16', '--mixed-precision', '--base-model', '/share/pretrained_lm/meta-llama/TinyLlama_v1.1', '--save', '/work/llm_recipes/models/tiny-llama-sample', '--load', '/work/llm_recipes/models/tiny-llama-sample', '--fsdp-activation-checkpointing', '--sharding-strategy', 'FULL_SHARD', '--checkpoint-type', 'LOCAL_STATE_DICT', '--save-n-checkpoints', '10', '--hf-upload-retry-limit', '2', '--hf-repo-id', 'koichi12/tiny-llama-sample', '--wandb-entity', 'iwakawa-koichi-q5-tohoku-nlp6723', '--wandb-project', 'llm_tutorial', '--wandb-name', 'tiny-llama-sample_train_2024-08-04-04:01:22'), 'state': 'running', 'program': '/project/examples/finetuning.py', 'codePathLocal': 'examples/finetuning.py', 'codePath': 'examples/finetuning.py', 'git': {'remote': 'https://github.com/cl-tohoku/llm-recipes-failab-m1-yans.git', 'commit': '3be5353210a678dc7008f237fa16b99f2bdf36ea'}, 'email': None, 'root': '/project', 'host': 'gpu-koiwa-00', 'username': 'koiwa', 'executable': '/usr/bin/python', 'cpu_count': 18, 'cpu_count_logical': 18, 'cpu_freq': {'current': 2400.034, 'min': 0.0, 'max': 0.0}, 'cpu_freq_per_core': [{'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}, {'current': 2400.034, 'min': 0.0, 'max': 0.0}], 'disk': {'/': {'total': 0.0625, 'used': 1.1444091796875e-05}}, 'gpu': 'NVIDIA A100-SXM4-40GB', 'gpu_count': 1, 'gpu_devices': [{'name': 'NVIDIA A100-SXM4-40GB', 'memory_total': 42949672960}], 'memory': {'total': 56.48782730102539}} +2024-08-04 04:01:33,877 INFO HandlerThread:13361 [system_monitor.py:probe():224] Finished collecting system info +2024-08-04 04:01:33,877 INFO HandlerThread:13361 [system_monitor.py:probe():227] Publishing system info +2024-08-04 04:01:33,878 INFO HandlerThread:13361 [system_monitor.py:probe():229] Finished publishing system info +2024-08-04 04:01:33,884 DEBUG SenderThread:13361 [sender.py:send():382] send: files +2024-08-04 04:01:33,884 INFO SenderThread:13361 [sender.py:_save_file():1403] saving file wandb-metadata.json with policy now +2024-08-04 04:01:33,893 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: python_packages +2024-08-04 04:01:33,893 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: stop_status +2024-08-04 04:01:33,894 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: python_packages +2024-08-04 04:01:33,894 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 04:01:33,895 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: stop_status +2024-08-04 04:01:34,125 DEBUG SenderThread:13361 [sender.py:send():382] send: telemetry +2024-08-04 04:01:34,547 INFO wandb-upload_0:13361 [upload_job.py:push():131] Uploaded file /tmp/tmp1y3_1qmlwandb/004ik6u3-wandb-metadata.json +2024-08-04 04:01:34,753 INFO Thread-12 :13361 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_040133-gue8fknz/files/wandb-metadata.json +2024-08-04 04:01:34,753 INFO Thread-12 :13361 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_040133-gue8fknz/files/requirements.txt +2024-08-04 04:01:34,753 INFO Thread-12 :13361 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_040133-gue8fknz/files/output.log +2024-08-04 04:01:36,386 DEBUG SenderThread:13361 [sender.py:send():382] send: config +2024-08-04 04:01:36,386 DEBUG SenderThread:13361 [sender.py:send():382] send: config +2024-08-04 04:01:36,753 INFO Thread-12 :13361 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_040133-gue8fknz/files/output.log +2024-08-04 04:01:37,033 DEBUG SenderThread:13361 [sender.py:send():382] send: exit +2024-08-04 04:01:37,033 INFO SenderThread:13361 [sender.py:send_exit():589] handling exit code: 1 +2024-08-04 04:01:37,033 INFO SenderThread:13361 [sender.py:send_exit():591] handling runtime: 3 +2024-08-04 04:01:37,035 INFO SenderThread:13361 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 04:01:37,035 INFO SenderThread:13361 [sender.py:send_exit():597] send defer +2024-08-04 04:01:37,035 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:37,035 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 0 +2024-08-04 04:01:37,035 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:37,035 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 0 +2024-08-04 04:01:37,035 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 1 +2024-08-04 04:01:37,035 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:37,035 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 1 +2024-08-04 04:01:37,036 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:37,036 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 1 +2024-08-04 04:01:37,036 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 2 +2024-08-04 04:01:37,036 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:37,036 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 2 +2024-08-04 04:01:37,036 INFO HandlerThread:13361 [system_monitor.py:finish():203] Stopping system monitor +2024-08-04 04:01:37,036 DEBUG SystemMonitor:13361 [system_monitor.py:_start():172] Starting system metrics aggregation loop +2024-08-04 04:01:37,036 INFO HandlerThread:13361 [interfaces.py:finish():202] Joined cpu monitor +2024-08-04 04:01:37,036 DEBUG SystemMonitor:13361 [system_monitor.py:_start():179] Finished system metrics aggregation loop +2024-08-04 04:01:37,037 INFO HandlerThread:13361 [interfaces.py:finish():202] Joined disk monitor +2024-08-04 04:01:37,037 DEBUG SystemMonitor:13361 [system_monitor.py:_start():183] Publishing last batch of metrics +2024-08-04 04:01:37,070 INFO HandlerThread:13361 [interfaces.py:finish():202] Joined gpu monitor +2024-08-04 04:01:37,071 INFO HandlerThread:13361 [interfaces.py:finish():202] Joined memory monitor +2024-08-04 04:01:37,071 INFO HandlerThread:13361 [interfaces.py:finish():202] Joined network monitor +2024-08-04 04:01:37,071 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:37,071 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 2 +2024-08-04 04:01:37,071 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 3 +2024-08-04 04:01:37,071 DEBUG SenderThread:13361 [sender.py:send():382] send: stats +2024-08-04 04:01:37,071 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:37,072 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 3 +2024-08-04 04:01:37,072 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:37,072 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 3 +2024-08-04 04:01:37,072 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 4 +2024-08-04 04:01:37,072 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:37,072 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 4 +2024-08-04 04:01:37,072 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:37,072 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 4 +2024-08-04 04:01:37,072 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 5 +2024-08-04 04:01:37,072 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:37,072 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 5 +2024-08-04 04:01:37,073 DEBUG SenderThread:13361 [sender.py:send():382] send: summary +2024-08-04 04:01:37,073 INFO SenderThread:13361 [sender.py:_save_file():1403] saving file wandb-summary.json with policy end +2024-08-04 04:01:37,074 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:37,074 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 5 +2024-08-04 04:01:37,074 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 6 +2024-08-04 04:01:37,074 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:37,074 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 6 +2024-08-04 04:01:37,074 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:37,074 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 6 +2024-08-04 04:01:37,077 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: status_report +2024-08-04 04:01:37,285 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 7 +2024-08-04 04:01:37,285 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:37,285 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 7 +2024-08-04 04:01:37,286 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:37,286 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 7 +2024-08-04 04:01:37,754 INFO Thread-12 :13361 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_040133-gue8fknz/files/config.yaml +2024-08-04 04:01:37,754 INFO Thread-12 :13361 [dir_watcher.py:_on_file_created():271] file/dir created: /project/wandb/run-20240804_040133-gue8fknz/files/wandb-summary.json +2024-08-04 04:01:38,033 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 04:01:38,589 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 8 +2024-08-04 04:01:38,589 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 04:01:38,589 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:38,589 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 8 +2024-08-04 04:01:38,590 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:38,590 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 8 +2024-08-04 04:01:38,590 INFO SenderThread:13361 [job_builder.py:build():296] Attempting to build job artifact +2024-08-04 04:01:38,590 INFO SenderThread:13361 [job_builder.py:_get_source_type():426] is repo sourced job +2024-08-04 04:01:38,604 INFO SenderThread:13361 [job_builder.py:build():402] adding wandb-job metadata file +2024-08-04 04:01:38,613 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 9 +2024-08-04 04:01:38,613 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:38,613 DEBUG SenderThread:13361 [sender.py:send():382] send: artifact +2024-08-04 04:01:38,613 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 9 +2024-08-04 04:01:38,755 INFO Thread-12 :13361 [dir_watcher.py:_on_file_modified():288] file/dir modified: /project/wandb/run-20240804_040133-gue8fknz/files/output.log +2024-08-04 04:01:39,033 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 04:01:39,446 INFO SenderThread:13361 [sender.py:send_artifact():1494] sent artifact job-https___github.com_cl-tohoku_llm-recipes-failab-m1-yans.git_examples_finetuning.py - {'id': 'QXJ0aWZhY3Q6MTA5MTk2NTkzOA==', 'state': 'COMMITTED', 'artifactSequence': {'id': 'QXJ0aWZhY3RDb2xsZWN0aW9uOjM2MjY3MjMzNA==', 'latestArtifact': {'id': 'QXJ0aWZhY3Q6MTA5MzUzODM4NQ==', 'versionIndex': 3}}} +2024-08-04 04:01:39,446 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:39,446 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 9 +2024-08-04 04:01:39,446 INFO SenderThread:13361 [dir_watcher.py:finish():358] shutting down directory watcher +2024-08-04 04:01:39,756 INFO SenderThread:13361 [dir_watcher.py:finish():388] scan: /project/wandb/run-20240804_040133-gue8fknz/files +2024-08-04 04:01:39,756 INFO SenderThread:13361 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_040133-gue8fknz/files/requirements.txt requirements.txt +2024-08-04 04:01:39,756 INFO SenderThread:13361 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_040133-gue8fknz/files/config.yaml config.yaml +2024-08-04 04:01:39,757 INFO SenderThread:13361 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_040133-gue8fknz/files/wandb-metadata.json wandb-metadata.json +2024-08-04 04:01:39,757 INFO SenderThread:13361 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_040133-gue8fknz/files/wandb-summary.json wandb-summary.json +2024-08-04 04:01:39,759 INFO SenderThread:13361 [dir_watcher.py:finish():402] scan save: /project/wandb/run-20240804_040133-gue8fknz/files/output.log output.log +2024-08-04 04:01:39,761 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 10 +2024-08-04 04:01:39,762 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 04:01:39,763 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:39,763 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 10 +2024-08-04 04:01:39,763 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:39,763 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 10 +2024-08-04 04:01:39,763 INFO SenderThread:13361 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 04:01:40,034 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 04:01:40,034 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 04:01:40,196 INFO wandb-upload_0:13361 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_040133-gue8fknz/files/requirements.txt +2024-08-04 04:01:40,273 INFO wandb-upload_1:13361 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_040133-gue8fknz/files/config.yaml +2024-08-04 04:01:40,327 INFO wandb-upload_2:13361 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_040133-gue8fknz/files/wandb-summary.json +2024-08-04 04:01:40,379 INFO wandb-upload_3:13361 [upload_job.py:push():131] Uploaded file /project/wandb/run-20240804_040133-gue8fknz/files/output.log +2024-08-04 04:01:40,579 INFO Thread-11 (_thread_body):13361 [sender.py:transition_state():617] send defer: 11 +2024-08-04 04:01:40,579 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:40,579 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 11 +2024-08-04 04:01:40,580 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:40,580 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 11 +2024-08-04 04:01:40,580 INFO SenderThread:13361 [file_pusher.py:join():178] waiting for file pusher +2024-08-04 04:01:40,580 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 12 +2024-08-04 04:01:40,580 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:40,580 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 12 +2024-08-04 04:01:40,580 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:40,580 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 12 +2024-08-04 04:01:40,580 INFO SenderThread:13361 [file_stream.py:finish():595] file stream finish called +2024-08-04 04:01:40,764 INFO SenderThread:13361 [file_stream.py:finish():599] file stream finish is done +2024-08-04 04:01:40,765 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 13 +2024-08-04 04:01:40,765 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:40,765 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 13 +2024-08-04 04:01:40,765 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:40,765 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 13 +2024-08-04 04:01:40,765 INFO SenderThread:13361 [sender.py:transition_state():617] send defer: 14 +2024-08-04 04:01:40,765 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: defer +2024-08-04 04:01:40,766 DEBUG SenderThread:13361 [sender.py:send():382] send: final +2024-08-04 04:01:40,766 INFO HandlerThread:13361 [handler.py:handle_request_defer():172] handle defer: 14 +2024-08-04 04:01:40,766 DEBUG SenderThread:13361 [sender.py:send():382] send: footer +2024-08-04 04:01:40,766 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: defer +2024-08-04 04:01:40,766 INFO SenderThread:13361 [sender.py:send_request_defer():613] handle sender defer: 14 +2024-08-04 04:01:40,766 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 04:01:40,766 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 04:01:40,767 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: poll_exit +2024-08-04 04:01:40,767 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: poll_exit +2024-08-04 04:01:40,767 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: server_info +2024-08-04 04:01:40,767 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: server_info +2024-08-04 04:01:40,769 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: get_summary +2024-08-04 04:01:40,769 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: sampled_history +2024-08-04 04:01:40,769 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: internal_messages +2024-08-04 04:01:40,770 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: job_info +2024-08-04 04:01:40,929 DEBUG SenderThread:13361 [sender.py:send_request():409] send_request: job_info +2024-08-04 04:01:40,929 INFO MainThread:13361 [wandb_run.py:_footer_history_summary_info():3866] rendering history +2024-08-04 04:01:40,929 INFO MainThread:13361 [wandb_run.py:_footer_history_summary_info():3898] rendering summary +2024-08-04 04:01:40,929 INFO MainThread:13361 [wandb_run.py:_footer_sync_info():3825] logging synced files +2024-08-04 04:01:40,930 DEBUG HandlerThread:13361 [handler.py:handle_request():146] handle_request: shutdown +2024-08-04 04:01:40,930 INFO HandlerThread:13361 [handler.py:finish():869] shutting down handler +2024-08-04 04:01:41,770 INFO WriterThread:13361 [datastore.py:close():296] close: /project/wandb/run-20240804_040133-gue8fknz/run-gue8fknz.wandb +2024-08-04 04:01:41,929 INFO SenderThread:13361 [sender.py:finish():1572] shutting down sender +2024-08-04 04:01:41,929 INFO SenderThread:13361 [file_pusher.py:finish():172] shutting down file pusher +2024-08-04 04:01:41,929 INFO SenderThread:13361 [file_pusher.py:join():178] waiting for file pusher diff --git a/wandb/run-20240804_040133-gue8fknz/logs/debug.log b/wandb/run-20240804_040133-gue8fknz/logs/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..0dfd6212a23ccb8b847bb500eb64c51c27085e76 --- /dev/null +++ b/wandb/run-20240804_040133-gue8fknz/logs/debug.log @@ -0,0 +1,29 @@ +2024-08-04 04:01:33,273 INFO MainThread:13290 [wandb_setup.py:_flush():76] Current SDK version is 0.16.3 +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_setup.py:_flush():76] Configure stats pid to 13290 +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_setup.py:_flush():76] Loading settings from /singularity_home/.config/wandb/settings +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_setup.py:_flush():76] Loading settings from /project/wandb/settings +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_setup.py:_flush():76] Loading settings from environment variables: {'api_key': '***REDACTED***', 'run_notes': 'Train tuny llama sample'} +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_setup.py:_flush():76] Applying setup settings: {'_disable_service': False} +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_setup.py:_flush():76] Inferring run settings from compute environment: {'program_relpath': 'examples/finetuning.py', 'program_abspath': '/project/examples/finetuning.py', 'program': '/project/examples/finetuning.py'} +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_init.py:_log_setup():526] Logging user logs to /project/wandb/run-20240804_040133-gue8fknz/logs/debug.log +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_init.py:_log_setup():527] Logging internal logs to /project/wandb/run-20240804_040133-gue8fknz/logs/debug-internal.log +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_init.py:init():566] calling init triggers +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_init.py:init():573] wandb.init called with sweep_config: {} +config: {'sharding_strategy': 'FULL_SHARD', 'checkpoint_type': 'LOCAL_STATE_DICT', 'fsdp_activation_checkpointing': True, 'fsdp_cpu_offload': False, 'low_cpu_fsdp': False, 'no_meta_device': False, 'data_path': None, 'split': '969, 30, 1', 'train_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'valid_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'test_data_path': ['4013541', '/work/llm_recipes/datasets/bin/common_crawl_and_extended_common_crawl.doc_extracted.200.sorted.uniq.filtered.shuf.head/data_text_document'], 'data_cache_path': None, 'vocab_size': None, 'vocab_file': None, 'merge_file': None, 'seq_length': 256, 'num_workers': 2, 'tokenizer_type': 'Llama2Tokenizer', 'tokenizer_model': '/share/pretrained_lm/meta-llama/TinyLlama_v1.1/tokenizer.model', 'reset_position_ids': False, 'reset_attention_mask': False, 'eod_mask_loss': False, 'retro_return_doc_ids': False, 'short_seq_prob': 0.1, 'vocab_extra_ids': 0, 'seed': 1234, 'use_mpi': False, 'wandb_entity': 'iwakawa-koichi-q5-tohoku-nlp6723', 'wandb_name': 'tiny-llama-sample_train_2024-08-04-04:01:22', 'wandb_project': 'llm_tutorial', 'quantization': False, 'use_freeze_layers': False, 'freeze_layers': None, 'bf16': True, 'fp16': False, 'mixed_precision': True, 'param_dtype': None, 'load': '/work/llm_recipes/models/tiny-llama-sample', 'save': '/work/llm_recipes/models/tiny-llama-sample', 'base_model': '/share/pretrained_lm/meta-llama/TinyLlama_v1.1', 'use_better_transformer': False, 'grad_clip_norm': 1.0, 'eval_interval': 200, 'save_interval': 200, 'eval_iters': 10, 'optimizer': 'adam', 'lr': 2e-05, 'lr_decay_style': 'cosine', 'lr_decay_iters': 2000, 'lr_warmup_iters': 500, 'min_lr': 1e-06, 'train_iters': 2000, 'train_samples': None, 'global_batch_size': 320, 'micro_batch_size': 8, 'make_vocab_size_divisible_by': 128, 'sliding_window_size': 2048, 'skip_batch': None, 'no_save_optimizer_state': False, 'continual_pretraining': False, 'instruction_tuning': False, 'direct_preference_optimization': False, 'attention_dropout': 0.1, 'hidden_dropout': 0.1, 'weight_decay': 0.1, 'adam_beta1': 0.9, 'adam_beta2': 0.95, 'adam_eps': 1e-06, 'hf_transformer_model_dir': None, 'instruction_train_data_path': None, 'instruction_valid_data_path': None, 'epoch': None, 'instruction_dataset_size': None, 'save_sampler_state': False, 'label_smoothing': 0.0, 'save_n_checkpoints': 10, 'hf_repo_id': 'koichi12/tiny-llama-sample', 'create_public_hf_repo': False, 'upload_all_checkpoints_to_hf': False, 'hf_upload_retry_limit': 2, 'exit_duration_in_mins': None, 'source_key': None, 'target_key': None, 'attn_implementation': 'flash_attention_2', 'efficient_instruction_tuning': False, 'remove_padding_masking': False, 'save_start_iter': None, 'rank': 0, 'world_size': 1, 'padded_vocab_size': 32000, 'gradient_accumulation_steps': 40} +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_init.py:init():616] starting backend +2024-08-04 04:01:33,274 INFO MainThread:13290 [wandb_init.py:init():620] setting up manager +2024-08-04 04:01:33,279 INFO MainThread:13290 [backend.py:_multiprocessing_setup():105] multiprocessing start_methods=fork,spawn,forkserver, using: spawn +2024-08-04 04:01:33,280 INFO MainThread:13290 [wandb_init.py:init():628] backend started and connected +2024-08-04 04:01:33,285 INFO MainThread:13290 [wandb_init.py:init():720] updated telemetry +2024-08-04 04:01:33,295 INFO MainThread:13290 [wandb_init.py:init():753] communicating run to backend with 90.0 second timeout +2024-08-04 04:01:33,756 INFO MainThread:13290 [wandb_run.py:_on_init():2262] communicating current version +2024-08-04 04:01:33,834 INFO MainThread:13290 [wandb_run.py:_on_init():2271] got version response upgrade_message: "wandb version 0.17.5 is available! To upgrade, please run:\n $ pip install wandb --upgrade" + +2024-08-04 04:01:33,835 INFO MainThread:13290 [wandb_init.py:init():804] starting run threads in backend +2024-08-04 04:01:33,892 INFO MainThread:13290 [wandb_run.py:_console_start():2241] atexit reg +2024-08-04 04:01:33,892 INFO MainThread:13290 [wandb_run.py:_redirect():2096] redirect: wrap_raw +2024-08-04 04:01:33,893 INFO MainThread:13290 [wandb_run.py:_redirect():2161] Wrapping output streams. +2024-08-04 04:01:33,893 INFO MainThread:13290 [wandb_run.py:_redirect():2186] Redirects installed. +2024-08-04 04:01:33,894 INFO MainThread:13290 [wandb_init.py:init():847] run started, returning control to user process +2024-08-04 04:01:36,385 INFO MainThread:13290 [wandb_run.py:_config_callback():1343] config_cb None None {'activation_function': 'silu', 'hidden_size': 2048, 'model_type': 'llama', 'max_position_embeddings': 2048, 'num_attention_heads': 32, 'num_hidden_layers': 22, 'model_architecture': 'LlamaForCausalLM'} +2024-08-04 04:01:36,386 INFO MainThread:13290 [wandb_run.py:_config_callback():1343] config_cb None None {'world_size': 1} diff --git a/wandb/run-20240804_040133-gue8fknz/run-gue8fknz.wandb b/wandb/run-20240804_040133-gue8fknz/run-gue8fknz.wandb new file mode 100644 index 0000000000000000000000000000000000000000..bd5ed9ab00c95bfc6ac2dbf7b51884c8f552d4e5 Binary files /dev/null and b/wandb/run-20240804_040133-gue8fknz/run-gue8fknz.wandb differ diff --git a/wandb/run-20240812_072401-esew3nhv/files/output.log b/wandb/run-20240812_072401-esew3nhv/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..c6a33ae4e59195bcb6b69af096681f4914482720 --- /dev/null +++ b/wandb/run-20240812_072401-esew3nhv/files/output.log @@ -0,0 +1,102 @@ +Created Hugging Face repository with ID koichi12/yans-qwen2-0.5B. +Clearing GPU cache for all ranks +--> Running with torch torch_distributed debug set to detail +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/yans-qwen2-0.5B, skipping model loading +--> Model /share/pretrained_lm/Qwen/Qwen2-0.5B +--> /share/pretrained_lm/Qwen/Qwen2-0.5B has 494.032768 Million params +BFloat16 enabled for mixed precision - using bfSixteen policy +--> applying fsdp activation checkpointing... + > datasets target sizes (minimum size): + train: 6400000 + validation: 12803200 + test: 3200 +> building train, validation, and test datasets for GPT ... +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/usr/local/lib/python3.10/dist-packages/torch/distributed/fsdp/_init_utils.py:441: UserWarning: FSDP is switching to use `NO_SHARD` instead of ShardingStrategy.FULL_SHARD since the world size is 1. + warnings.warn( +Let split = None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +> finished creating GPT datasets ... +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +No checkpoint found in /work/llm_recipes/models/yans-qwen2-0.5B, skipping optimizer loading +File not found: /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +Unable to read latest iteration from /work/llm_recipes/models/yans-qwen2-0.5B/latest_iteration.txt +model info: FullyShardedDataParallel( + (_fsdp_wrapped_module): Qwen2ForCausalLM( + (model): Qwen2Model( + (embed_tokens): Embedding(151936, 896) + (layers): ModuleList( + (0-23): 24 x FullyShardedDataParallel( + (_fsdp_wrapped_module): CheckpointWrapper( + (_checkpoint_wrapped_module): Qwen2DecoderLayer( + (self_attn): Qwen2FlashAttention2( + (q_proj): Linear(in_features=896, out_features=896, bias=True) + (k_proj): Linear(in_features=896, out_features=128, bias=True) + (v_proj): Linear(in_features=896, out_features=128, bias=True) + (o_proj): Linear(in_features=896, out_features=896, bias=False) + (rotary_emb): Qwen2RotaryEmbedding() + ) + (mlp): Qwen2MLP( + (gate_proj): Linear(in_features=896, out_features=4864, bias=False) + (up_proj): Linear(in_features=896, out_features=4864, bias=False) + (down_proj): Linear(in_features=4864, out_features=896, bias=False) + (act_fn): SiLU() + ) + (input_layernorm): Qwen2RMSNorm() + (post_attention_layernorm): Qwen2RMSNorm() + ) + ) + ) + ) + (norm): Qwen2RMSNorm() + ) + (lm_head): Linear(in_features=896, out_features=151936, bias=False) + ) +) +model config: Qwen2Config { + "_name_or_path": "/share/pretrained_lm/Qwen/Qwen2-0.5B", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "label_smoothing": 0.0, + "max_position_embeddings": 4096, + "max_window_layers": 24, + "model_type": "qwen2", + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.43.3", + "use_cache": false, + "use_sliding_window": false, + "vocab_size": 151936 +} +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +Building a BlendedDataset for a single MegatronDataset +Unable to save the indexes because path_to_cache is None +------------------------------------------------------------------ +iteration: 1 , TFLOPS: 69.84552772973656, Tokens per sec: 17370.23184924799, Loss: 4.1814446449279785 +------------------------------------------------------------------ +------------------------------------------------------------------ +iteration: 2 , TFLOPS: 70.2701234853672, Tokens per sec: 17475.82668054555, Loss: 4.1914520263671875 +------------------------------------------------------------------ \ No newline at end of file diff --git a/wandb/run-20240812_072401-esew3nhv/files/wandb-summary.json b/wandb/run-20240812_072401-esew3nhv/files/wandb-summary.json new file mode 100644 index 0000000000000000000000000000000000000000..a8eab4fe7231ef4b43864064b5b3db8addf343b0 --- /dev/null +++ b/wandb/run-20240812_072401-esew3nhv/files/wandb-summary.json @@ -0,0 +1 @@ +{"training/loss": 4.1914520263671875, "training/perplexity": 66.11872742800412, "utils/batch_size": 1, "utils/global_batch_size": 320, "utils/seq_len": 4097, "utils/gradient_accumulation_steps": 320, "utils/iteration": 2, "optimizer/lr": 1.076e-06, "optimizer/variance_l2": 0.0028690248120644456, "optimizer/variance_sqrt_l2": 0.3125014781916943, "optimizer/momentum_l2": 0.18910939210451805, "optimizer/weight_l2": 825.0639369164065, "optimizer/variance_l1": 0.0977630615234375, "optimizer/variance_sqrt_l1": 1277.0, "optimizer/momentum_l1": 744.8125, "optimizer/weight_l1": 6886400.0, "optimizer/variance_abs_max": 0.002105712890625, "optimizer/variance_sqrt_abs_max": 0.0458984375, "optimizer/momentum_abs_max": 0.0279541015625, "optimizer/weight_abs_max": 175.0, "stats/1_iteration_time": 75.02019927100082, "stats/tokens_per_sec": 17475.82668054555, "stats/tokens_per_sec_per_gpu": 17475.82668054555, "stats/tflops": 70.2701234853672, "_timestamp": 1723415196.066783, "_runtime": 154.56286883354187, "_step": 2, "_wandb": {"runtime": 191}} \ No newline at end of file