""" |
Utility that checks all docstrings of public objects have an argument section matching their signature. |
Use from the root of the repo with: |
```bash |
python utils/check_docstrings.py |
``` |
for a check that will error in case of inconsistencies (used by `make repo-consistency`). |
To auto-fix issues run: |
```bash |
python utils/check_docstrings.py --fix_and_overwrite |
``` |
which is used by `make fix-copies` (note that this fills what it cans, you might have to manually fill information |
like argument descriptions). |
""" |
import argparse |
import ast |
import enum |
import inspect |
import operator as op |
import re |
from pathlib import Path |
from typing import Any, Optional, Tuple, Union |
from check_repo import ignore_undocumented |
from transformers.utils import direct_transformers_import |
PATH_TO_TRANSFORMERS = Path("src").resolve() / "transformers" |
transformers = direct_transformers_import(PATH_TO_TRANSFORMERS) |
OPTIONAL_KEYWORD = "*optional*" |
_re_args = re.compile(r"^\s*(Args?|Arguments?|Attributes?|Params?|Parameters?):\s*$") |
_re_parse_arg = re.compile(r"^(\s*)(\S+)\s+\((.+)\)(?:\:|$)") |
_re_parse_description = re.compile(r"\*optional\*, defaults to (.*)$") |
"InputExample", |
"InputFeatures", |
"TFSequenceSummary", |
"TFBertTokenizer", |
"TFGPT2Tokenizer", |
"ASTFeatureExtractor", |
"AlbertModel", |
"AlbertTokenizerFast", |
"AlignTextModel", |
"AlignVisionConfig", |
"AudioClassificationPipeline", |
"AutoformerConfig", |
"AutomaticSpeechRecognitionPipeline", |
"AzureOpenAiAgent", |
"BarkCoarseConfig", |
"BarkConfig", |
"BarkFineConfig", |
"BarkSemanticConfig", |
"BartConfig", |
"BartTokenizerFast", |
"BarthezTokenizerFast", |
"BeitModel", |
"BertConfig", |
"BertJapaneseTokenizer", |
"BertModel", |
"BertTokenizerFast", |
"BigBirdConfig", |
"BigBirdForQuestionAnswering", |
"BigBirdModel", |
"BigBirdPegasusConfig", |
"BigBirdTokenizerFast", |
"BitImageProcessor", |
"BlenderbotConfig", |
"BlenderbotSmallConfig", |
"BlenderbotSmallTokenizerFast", |
"BlenderbotTokenizerFast", |
"Blip2QFormerConfig", |
"Blip2VisionConfig", |
"BlipTextConfig", |
"BlipVisionConfig", |
"BloomConfig", |
"BloomTokenizerFast", |
"BridgeTowerTextConfig", |
"BridgeTowerVisionConfig", |
"BrosModel", |
"CamembertConfig", |
"CamembertModel", |
"CamembertTokenizerFast", |
"CanineModel", |
"CanineTokenizer", |
"ChineseCLIPTextModel", |
"ClapTextConfig", |
"ConditionalDetrConfig", |
"ConditionalDetrImageProcessor", |
"ConvBertConfig", |
"ConvBertTokenizerFast", |
"ConvNextConfig", |
"ConvNextV2Config", |
"ConversationalPipeline", |
"CpmAntTokenizer", |
"CvtConfig", |
"CvtModel", |
"DeiTImageProcessor", |
"DPRReaderTokenizer", |
"DPRReaderTokenizerFast", |
"DPTModel", |
"Data2VecAudioConfig", |
"Data2VecTextConfig", |
"Data2VecTextModel", |
"Data2VecVisionModel", |
"DataCollatorForLanguageModeling", |
"DebertaConfig", |
"DebertaV2Config", |
"DebertaV2Tokenizer", |
"DebertaV2TokenizerFast", |
"DecisionTransformerConfig", |
"DeformableDetrConfig", |
"DeformableDetrImageProcessor", |
"DeiTModel", |
"DepthEstimationPipeline", |
"DetaConfig", |
"DetaImageProcessor", |
"DetrConfig", |
"DetrImageProcessor", |
"DinatModel", |
"DistilBertConfig", |
"DistilBertTokenizerFast", |
"DocumentQuestionAnsweringPipeline", |
"DonutSwinModel", |
"EarlyStoppingCallback", |
"EfficientFormerConfig", |
"EfficientFormerImageProcessor", |
"EfficientNetConfig", |
"ElectraConfig", |
"ElectraTokenizerFast", |
"EncoderDecoderModel", |
"EncoderRepetitionPenaltyLogitsProcessor", |
"ErnieMModel", |
"ErnieModel", |
"ErnieMTokenizer", |
"EsmConfig", |
"EsmModel", |
"FlaxAlbertForMaskedLM", |
"FlaxAlbertForMultipleChoice", |
"FlaxAlbertForPreTraining", |
"FlaxAlbertForQuestionAnswering", |
"FlaxAlbertForSequenceClassification", |
"FlaxAlbertForTokenClassification", |
"FlaxAlbertModel", |
"FlaxBartForCausalLM", |
"FlaxBartForConditionalGeneration", |
"FlaxBartForQuestionAnswering", |
"FlaxBartForSequenceClassification", |
"FlaxBartModel", |
"FlaxBeitForImageClassification", |
"FlaxBeitForMaskedImageModeling", |
"FlaxBeitModel", |
"FlaxBertForCausalLM", |
"FlaxBertForMaskedLM", |
"FlaxBertForMultipleChoice", |
"FlaxBertForNextSentencePrediction", |
"FlaxBertForPreTraining", |
"FlaxBertForQuestionAnswering", |
"FlaxBertForSequenceClassification", |
"FlaxBertForTokenClassification", |
"FlaxBertModel", |
"FlaxBigBirdForCausalLM", |
"FlaxBigBirdForMaskedLM", |
"FlaxBigBirdForMultipleChoice", |
"FlaxBigBirdForPreTraining", |
"FlaxBigBirdForQuestionAnswering", |
"FlaxBigBirdForSequenceClassification", |
"FlaxBigBirdForTokenClassification", |
"FlaxBigBirdModel", |
"FlaxBlenderbotForConditionalGeneration", |
"FlaxBlenderbotModel", |
"FlaxBlenderbotSmallForConditionalGeneration", |
"FlaxBlenderbotSmallModel", |
"FlaxBloomForCausalLM", |
"FlaxBloomModel", |
"FlaxCLIPModel", |
"FlaxDistilBertForMaskedLM", |
"FlaxDistilBertForMultipleChoice", |
"FlaxDistilBertForQuestionAnswering", |
"FlaxDistilBertForSequenceClassification", |
"FlaxDistilBertForTokenClassification", |
"FlaxDistilBertModel", |
"FlaxElectraForCausalLM", |
"FlaxElectraForMaskedLM", |
"FlaxElectraForMultipleChoice", |
"FlaxElectraForPreTraining", |
"FlaxElectraForQuestionAnswering", |
"FlaxElectraForSequenceClassification", |
"FlaxElectraForTokenClassification", |
"FlaxElectraModel", |
"FlaxEncoderDecoderModel", |
"FlaxGPT2LMHeadModel", |
"FlaxGPT2Model", |
"FlaxGPTJForCausalLM", |
"FlaxGPTJModel", |
"FlaxGPTNeoForCausalLM", |
"FlaxGPTNeoModel", |
"FlaxLlamaForCausalLM", |
"FlaxLlamaModel", |
"FlaxGemmaForCausalLM", |
"FlaxGemmaModel", |
"FlaxMBartForConditionalGeneration", |
"FlaxMBartForQuestionAnswering", |
"FlaxMBartForSequenceClassification", |
"FlaxMBartModel", |
"FlaxMarianMTModel", |
"FlaxMarianModel", |
"FlaxMistralForCausalLM", |
"FlaxMistralModel", |
"FlaxOPTForCausalLM", |
"FlaxPegasusForConditionalGeneration", |
"FlaxPegasusModel", |
"FlaxRegNetForImageClassification", |
"FlaxRegNetModel", |
"FlaxResNetForImageClassification", |
"FlaxResNetModel", |
"FlaxRoFormerForMaskedLM", |
"FlaxRoFormerForMultipleChoice", |
"FlaxRoFormerForQuestionAnswering", |
"FlaxRoFormerForSequenceClassification", |
"FlaxRoFormerForTokenClassification", |
"FlaxRoFormerModel", |
"FlaxRobertaForCausalLM", |
"FlaxRobertaForMaskedLM", |
"FlaxRobertaForMultipleChoice", |
"FlaxRobertaForQuestionAnswering", |
"FlaxRobertaForSequenceClassification", |
"FlaxRobertaForTokenClassification", |
"FlaxRobertaModel", |
"FlaxRobertaPreLayerNormForCausalLM", |
"FlaxRobertaPreLayerNormForMaskedLM", |
"FlaxRobertaPreLayerNormForMultipleChoice", |
"FlaxRobertaPreLayerNormForQuestionAnswering", |
"FlaxRobertaPreLayerNormForSequenceClassification", |
"FlaxRobertaPreLayerNormForTokenClassification", |
"FlaxRobertaPreLayerNormModel", |
"FlaxSpeechEncoderDecoderModel", |
"FlaxViTForImageClassification", |
"FlaxViTModel", |
"FlaxVisionEncoderDecoderModel", |
"FlaxVisionTextDualEncoderModel", |
"FlaxWav2Vec2ForCTC", |
"FlaxWav2Vec2ForPreTraining", |
"FlaxWav2Vec2Model", |
"FlaxWhisperForAudioClassification", |
"FlaxWhisperForConditionalGeneration", |
"FlaxWhisperModel", |
"FlaxWhisperTimeStampLogitsProcessor", |
"FlaxXGLMForCausalLM", |
"FlaxXGLMModel", |
"FlaxXLMRobertaForCausalLM", |
"FlaxXLMRobertaForMaskedLM", |
"FlaxXLMRobertaForMultipleChoice", |
"FlaxXLMRobertaForQuestionAnswering", |
"FlaxXLMRobertaForSequenceClassification", |
"FlaxXLMRobertaForTokenClassification", |
"FlaxXLMRobertaModel", |
"FNetConfig", |
"FNetModel", |
"FNetTokenizerFast", |
"FSMTConfig", |
"FeatureExtractionPipeline", |
"FillMaskPipeline", |
"FlaubertConfig", |
"FlavaConfig", |
"FlavaForPreTraining", |
"FlavaImageModel", |
"FlavaImageProcessor", |
"FlavaMultimodalModel", |
"FlavaTextConfig", |
"FlavaTextModel", |
"FocalNetModel", |
"FunnelTokenizerFast", |
"GPTBigCodeConfig", |
"GPTJConfig", |
"GPTNeoXConfig", |
"GPTNeoXJapaneseConfig", |
"GPTNeoXTokenizerFast", |
"GPTSanJapaneseConfig", |
"GitConfig", |
"GitVisionConfig", |
"GraphormerConfig", |
"GroupViTTextConfig", |
"GroupViTVisionConfig", |
"HerbertTokenizerFast", |
"HubertConfig", |
"HubertForCTC", |
"IBertConfig", |
"IBertModel", |
"IdeficsConfig", |
"IdeficsProcessor", |
"ImageClassificationPipeline", |
"ImageFeatureExtractionPipeline", |
"ImageGPTConfig", |
"ImageSegmentationPipeline", |
"ImageToImagePipeline", |
"ImageToTextPipeline", |
"InformerConfig", |
"InstructBlipQFormerConfig", |
"JukeboxPriorConfig", |
"JukeboxTokenizer", |
"LEDConfig", |
"LEDTokenizerFast", |
"LayoutLMForQuestionAnswering", |
"LayoutLMTokenizerFast", |
"LayoutLMv2Config", |
"LayoutLMv2ForQuestionAnswering", |
"LayoutLMv2TokenizerFast", |
"LayoutLMv3Config", |
"LayoutLMv3ImageProcessor", |
"LayoutLMv3TokenizerFast", |
"LayoutXLMTokenizerFast", |
"LevitConfig", |
"LiltConfig", |
"LiltModel", |
"LongT5Config", |
"LongformerConfig", |
"LongformerModel", |
"LongformerTokenizerFast", |
"LukeModel", |
"LukeTokenizer", |
"LxmertTokenizerFast", |
"M2M100Config", |
"M2M100Tokenizer", |
"MarkupLMProcessor", |
"MaskGenerationPipeline", |
"MBart50TokenizerFast", |
"MBartConfig", |
"MCTCTFeatureExtractor", |
"MPNetConfig", |
"MPNetModel", |
"MPNetTokenizerFast", |
"MT5Config", |
"MT5TokenizerFast", |
"MarianConfig", |
"MarianTokenizer", |
"MarkupLMConfig", |
"MarkupLMModel", |
"MarkupLMTokenizer", |
"MarkupLMTokenizerFast", |
"Mask2FormerConfig", |
"MaskFormerConfig", |
"MaxTimeCriteria", |
"MegaConfig", |
"MegaModel", |
"MegatronBertConfig", |
"MegatronBertForPreTraining", |
"MegatronBertModel", |
"MobileBertConfig", |
"MobileBertModel", |
"MobileBertTokenizerFast", |
"MobileNetV1ImageProcessor", |
"MobileNetV1Model", |
"MobileNetV2ImageProcessor", |
"MobileNetV2Model", |
"MobileViTModel", |
"MobileViTV2Model", |
"MLukeTokenizer", |
"MraConfig", |
"MusicgenDecoderConfig", |
"MusicgenForConditionalGeneration", |
"MusicgenMelodyForConditionalGeneration", |
"MvpConfig", |
"MvpTokenizerFast", |
"MT5Tokenizer", |
"NatModel", |
"NerPipeline", |
"NezhaConfig", |
"NezhaModel", |
"NllbMoeConfig", |
"NllbTokenizer", |
"NllbTokenizerFast", |
"NystromformerConfig", |
"OPTConfig", |
"ObjectDetectionPipeline", |
"OneFormerProcessor", |
"OpenAIGPTTokenizerFast", |
"OpenLlamaConfig", |
"PLBartConfig", |
"PegasusConfig", |
"PegasusTokenizer", |
"PegasusTokenizerFast", |
"PegasusXConfig", |
"PerceiverImageProcessor", |
"PerceiverModel", |
"PerceiverTokenizer", |
"PersimmonConfig", |
"Pipeline", |
"Pix2StructConfig", |
"Pix2StructTextConfig", |
"PLBartTokenizer", |
"Pop2PianoConfig", |
"PreTrainedTokenizer", |
"PreTrainedTokenizerBase", |
"PreTrainedTokenizerFast", |
"PrefixConstrainedLogitsProcessor", |
"ProphetNetConfig", |
"QDQBertConfig", |
"QDQBertModel", |
"QuestionAnsweringPipeline", |
"RagConfig", |
"RagModel", |
"RagRetriever", |
"RagSequenceForGeneration", |
"RagTokenForGeneration", |
"RealmConfig", |
"RealmForOpenQA", |
"RealmScorer", |
"RealmTokenizerFast", |
"ReformerConfig", |
"ReformerTokenizerFast", |
"RegNetConfig", |
"RemBertConfig", |
"RemBertModel", |
"RemBertTokenizer", |
"RemBertTokenizerFast", |
"RepetitionPenaltyLogitsProcessor", |
"RetriBertConfig", |
"RetriBertTokenizerFast", |
"RoCBertConfig", |
"RoCBertModel", |
"RoCBertTokenizer", |
"RoFormerConfig", |
"RobertaConfig", |
"RobertaModel", |
"RobertaPreLayerNormConfig", |
"RobertaPreLayerNormModel", |
"RobertaTokenizerFast", |
"SEWConfig", |
"SEWDConfig", |
"SEWForCTC", |
"SamConfig", |
"SamPromptEncoderConfig", |
"SeamlessM4TConfig", |
"SeamlessM4Tv2Config", |
"Seq2SeqTrainingArguments", |
"SpecialTokensMixin", |
"Speech2Text2Config", |
"Speech2Text2Tokenizer", |
"Speech2TextTokenizer", |
"SpeechEncoderDecoderModel", |
"SpeechT5Config", |
"SpeechT5Model", |
"SplinterConfig", |
"SplinterTokenizerFast", |
"SqueezeBertTokenizerFast", |
"SummarizationPipeline", |
"Swin2SRImageProcessor", |
"Swinv2Model", |
"SwitchTransformersConfig", |
"T5Config", |
"T5Tokenizer", |
"T5TokenizerFast", |
"TableQuestionAnsweringPipeline", |
"TableTransformerConfig", |
"TapasConfig", |
"TapasModel", |
"TapasTokenizer", |
"Text2TextGenerationPipeline", |
"TextClassificationPipeline", |
"TextGenerationPipeline", |
"TFAlbertForMaskedLM", |
"TFAlbertForMultipleChoice", |
"TFAlbertForPreTraining", |
"TFAlbertForQuestionAnswering", |
"TFAlbertForSequenceClassification", |
"TFAlbertForTokenClassification", |
"TFAlbertModel", |
"TFBartForConditionalGeneration", |
"TFBartForSequenceClassification", |
"TFBartModel", |
"TFBertForMaskedLM", |
"TFBertForMultipleChoice", |
"TFBertForNextSentencePrediction", |
"TFBertForPreTraining", |
"TFBertForQuestionAnswering", |
"TFBertForSequenceClassification", |
"TFBertForTokenClassification", |
"TFBertModel", |
"TFBlenderbotForConditionalGeneration", |
"TFBlenderbotModel", |
"TFBlenderbotSmallForConditionalGeneration", |
"TFBlenderbotSmallModel", |
"TFBlipForConditionalGeneration", |
"TFBlipForImageTextRetrieval", |
"TFBlipForQuestionAnswering", |
"TFCLIPModel", |
"TFCTRLForSequenceClassification", |
"TFCTRLLMHeadModel", |
"TFCTRLModel", |
"TFCamembertForCausalLM", |
"TFCamembertForMaskedLM", |
"TFCamembertForMultipleChoice", |
"TFCamembertForQuestionAnswering", |
"TFCamembertForSequenceClassification", |
"TFCamembertForTokenClassification", |
"TFCamembertModel", |
"TFConvBertForMaskedLM", |
"TFConvBertForMultipleChoice", |
"TFConvBertForQuestionAnswering", |
"TFConvBertForSequenceClassification", |
"TFConvBertForTokenClassification", |
"TFConvBertModel", |
"TFConvNextForImageClassification", |
"TFConvNextModel", |
"TFConvNextV2Model", |
"TFConvNextV2ForImageClassification", |
"TFCvtForImageClassification", |
"TFCvtModel", |
"TFDPRReader", |
"TFData2VecVisionForImageClassification", |
"TFData2VecVisionForSemanticSegmentation", |
"TFData2VecVisionModel", |
"TFDebertaForMaskedLM", |
"TFDebertaForQuestionAnswering", |
"TFDebertaForSequenceClassification", |
"TFDebertaForTokenClassification", |
"TFDebertaModel", |
"TFDebertaV2ForMaskedLM", |
"TFDebertaV2ForMultipleChoice", |
"TFDebertaV2ForQuestionAnswering", |
"TFDebertaV2ForSequenceClassification", |
"TFDebertaV2ForTokenClassification", |
"TFDebertaV2Model", |
"TFDeiTForImageClassification", |
"TFDeiTForImageClassificationWithTeacher", |
"TFDeiTForMaskedImageModeling", |
"TFDeiTModel", |
"TFDistilBertForMaskedLM", |
"TFDistilBertForMultipleChoice", |
"TFDistilBertForQuestionAnswering", |
"TFDistilBertForSequenceClassification", |
"TFDistilBertForTokenClassification", |
"TFDistilBertModel", |
"TFEfficientFormerForImageClassification", |
"TFEfficientFormerForImageClassificationWithTeacher", |
"TFEfficientFormerModel", |
"TFElectraForMaskedLM", |
"TFElectraForMultipleChoice", |
"TFElectraForPreTraining", |
"TFElectraForQuestionAnswering", |
"TFElectraForSequenceClassification", |
"TFElectraForTokenClassification", |
"TFElectraModel", |
"TFEncoderDecoderModel", |
"TFEsmForMaskedLM", |
"TFEsmForSequenceClassification", |
"TFEsmForTokenClassification", |
"TFEsmModel", |
"TFFlaubertForMultipleChoice", |
"TFFlaubertForQuestionAnsweringSimple", |
"TFFlaubertForSequenceClassification", |
"TFFlaubertForTokenClassification", |
"TFFlaubertModel", |
"TFFlaubertWithLMHeadModel", |
"TFFunnelBaseModel", |
"TFFunnelForMaskedLM", |
"TFFunnelForMultipleChoice", |
"TFFunnelForPreTraining", |
"TFFunnelForQuestionAnswering", |
"TFFunnelForSequenceClassification", |
"TFFunnelForTokenClassification", |
"TFFunnelModel", |
"TFGPT2DoubleHeadsModel", |
"TFGPT2ForSequenceClassification", |
"TFGPT2LMHeadModel", |
"TFGPT2Model", |
"TFGPTJForCausalLM", |
"TFGPTJForQuestionAnswering", |
"TFGPTJForSequenceClassification", |
"TFGPTJModel", |
"TFGroupViTModel", |
"TFHubertForCTC", |
"TFHubertModel", |
"TFLEDForConditionalGeneration", |
"TFLEDModel", |
"TFLayoutLMForMaskedLM", |
"TFLayoutLMForQuestionAnswering", |
"TFLayoutLMForSequenceClassification", |
"TFLayoutLMForTokenClassification", |
"TFLayoutLMModel", |
"TFLayoutLMv3ForQuestionAnswering", |
"TFLayoutLMv3ForSequenceClassification", |
"TFLayoutLMv3ForTokenClassification", |
"TFLayoutLMv3Model", |
"TFLongformerForMaskedLM", |
"TFLongformerForMultipleChoice", |
"TFLongformerForQuestionAnswering", |
"TFLongformerForSequenceClassification", |
"TFLongformerForTokenClassification", |
"TFLongformerModel", |
"TFLxmertForPreTraining", |
"TFLxmertModel", |
"TFMBartForConditionalGeneration", |
"TFMBartModel", |
"TFMPNetForMaskedLM", |
"TFMPNetForMultipleChoice", |
"TFMPNetForQuestionAnswering", |
"TFMPNetForSequenceClassification", |
"TFMPNetForTokenClassification", |
"TFMPNetModel", |
"TFMarianMTModel", |
"TFMarianModel", |
"TFMobileBertForMaskedLM", |
"TFMobileBertForMultipleChoice", |
"TFMobileBertForNextSentencePrediction", |
"TFMobileBertForPreTraining", |
"TFMobileBertForQuestionAnswering", |
"TFMobileBertForSequenceClassification", |
"TFMobileBertForTokenClassification", |
"TFMobileBertModel", |
"TFMobileViTForImageClassification", |
"TFMobileViTForSemanticSegmentation", |
"TFMobileViTModel", |
"TFOPTForCausalLM", |
"TFOPTModel", |
"TFOpenAIGPTDoubleHeadsModel", |
"TFOpenAIGPTForSequenceClassification", |
"TFOpenAIGPTLMHeadModel", |
"TFOpenAIGPTModel", |
"TFPegasusForConditionalGeneration", |
"TFPegasusModel", |
"TFRagModel", |
"TFRagSequenceForGeneration", |
"TFRagTokenForGeneration", |
"TFRegNetForImageClassification", |
"TFRegNetModel", |
"TFRemBertForCausalLM", |
"TFRemBertForMaskedLM", |
"TFRemBertForMultipleChoice", |
"TFRemBertForQuestionAnswering", |
"TFRemBertForSequenceClassification", |
"TFRemBertForTokenClassification", |
"TFRemBertModel", |
"TFRepetitionPenaltyLogitsProcessor", |
"TFResNetForImageClassification", |
"TFResNetModel", |
"TFRoFormerForCausalLM", |
"TFRoFormerForMaskedLM", |
"TFRoFormerForMultipleChoice", |
"TFRoFormerForQuestionAnswering", |
"TFRoFormerForSequenceClassification", |
"TFRoFormerForTokenClassification", |
"TFRoFormerModel", |
"TFRobertaForMaskedLM", |
"TFRobertaForMultipleChoice", |
"TFRobertaForQuestionAnswering", |
"TFRobertaForSequenceClassification", |
"TFRobertaForTokenClassification", |
"TFRobertaModel", |
"TFRobertaPreLayerNormForMaskedLM", |
"TFRobertaPreLayerNormForMultipleChoice", |
"TFRobertaPreLayerNormForQuestionAnswering", |
"TFRobertaPreLayerNormForSequenceClassification", |
"TFRobertaPreLayerNormForTokenClassification", |
"TFRobertaPreLayerNormModel", |
"TFSamModel", |
"TFSegformerForImageClassification", |
"TFSegformerForSemanticSegmentation", |
"TFSegformerModel", |
"TFSpeech2TextForConditionalGeneration", |
"TFSpeech2TextModel", |
"TFSwiftFormerForImageClassification", |
"TFSwiftFormerModel", |
"TFSwinForImageClassification", |
"TFSwinForMaskedImageModeling", |
"TFSwinModel", |
"TFT5EncoderModel", |
"TFT5ForConditionalGeneration", |
"TFT5Model", |
"TFTapasForMaskedLM", |
"TFTapasForQuestionAnswering", |
"TFTapasForSequenceClassification", |
"TFTapasModel", |
"TFTransfoXLForSequenceClassification", |
"TFTransfoXLLMHeadModel", |
"TFTransfoXLModel", |
"TFViTForImageClassification", |
"TFViTMAEForPreTraining", |
"TFViTMAEModel", |
"TFViTModel", |
"TFVisionEncoderDecoderModel", |
"TFVisionTextDualEncoderModel", |
"TFWav2Vec2ForCTC", |
"TFWav2Vec2Model", |
"TFWhisperForConditionalGeneration", |
"TFWhisperModel", |
"TFXGLMForCausalLM", |
"TFXGLMModel", |
"TFXLMForMultipleChoice", |
"TFXLMForQuestionAnsweringSimple", |
"TFXLMForSequenceClassification", |
"TFXLMForTokenClassification", |
"TFXLMModel", |
"TFXLMRobertaForCausalLM", |
"TFXLMRobertaForMaskedLM", |
"TFXLMRobertaForMultipleChoice", |
"TFXLMRobertaForQuestionAnswering", |
"TFXLMRobertaForSequenceClassification", |
"TFXLMRobertaForTokenClassification", |
"TFXLMRobertaModel", |
"TFXLMWithLMHeadModel", |
"TFXLNetForMultipleChoice", |
"TFXLNetForQuestionAnsweringSimple", |
"TFXLNetForSequenceClassification", |
"TFXLNetForTokenClassification", |
"TFXLNetLMHeadModel", |
"TFXLNetModel", |
"TimeSeriesTransformerConfig", |
"TokenClassificationPipeline", |
"TrOCRConfig", |
"TrainerState", |
"TrainingArguments", |
"TrajectoryTransformerConfig", |
"TranslationPipeline", |
"TvltImageProcessor", |
"UMT5Config", |
"UperNetConfig", |
"UperNetForSemanticSegmentation", |
"ViTHybridImageProcessor", |
"ViTHybridModel", |
"ViTMSNModel", |
"ViTModel", |
"VideoClassificationPipeline", |
"ViltConfig", |
"ViltForImagesAndTextClassification", |
"ViltModel", |
"VisionEncoderDecoderModel", |
"VisionTextDualEncoderModel", |
"VisualBertConfig", |
"VisualBertModel", |
"VisualQuestionAnsweringPipeline", |
"VitMatteForImageMatting", |
"VitsTokenizer", |
"VivitModel", |
"Wav2Vec2BertForCTC", |
"Wav2Vec2CTCTokenizer", |
"Wav2Vec2Config", |
"Wav2Vec2ConformerConfig", |
"Wav2Vec2ConformerForCTC", |
"Wav2Vec2FeatureExtractor", |
"Wav2Vec2PhonemeCTCTokenizer", |
"WavLMConfig", |
"WavLMForCTC", |
"WhisperConfig", |
"WhisperFeatureExtractor", |
"WhisperForAudioClassification", |
"XCLIPTextConfig", |
"XCLIPVisionConfig", |
"XGLMConfig", |
"XGLMModel", |
"XGLMTokenizerFast", |
"XLMConfig", |
"XLMProphetNetConfig", |
"XLMRobertaConfig", |
"XLMRobertaModel", |
"XLMRobertaTokenizerFast", |
"XLMRobertaXLConfig", |
"XLMRobertaXLModel", |
"XLNetConfig", |
"XLNetTokenizerFast", |
"XmodConfig", |
"XmodModel", |
"YolosImageProcessor", |
"YolosModel", |
"YosoConfig", |
"ZeroShotAudioClassificationPipeline", |
"ZeroShotClassificationPipeline", |
"ZeroShotImageClassificationPipeline", |
"ZeroShotObjectDetectionPipeline", |
] |
ast.Add: op.add, |
ast.Sub: op.sub, |
ast.Mult: op.mul, |
ast.Div: op.truediv, |
ast.Pow: op.pow, |
ast.BitXor: op.xor, |
ast.USub: op.neg, |
} |
def find_indent(line: str) -> int: |
""" |
Returns the number of spaces that start a line indent. |
""" |
search = re.search(r"^(\s*)(?:\S|$)", line) |
if search is None: |
return 0 |
return len(search.groups()[0]) |
def stringify_default(default: Any) -> str: |
""" |
Returns the string representation of a default value, as used in docstring: numbers are left as is, all other |
objects are in backtiks. |
Args: |
default (`Any`): The default value to process |
Returns: |
`str`: The string representation of that default. |
""" |
if isinstance(default, bool): |
return f"`{default}`" |
elif isinstance(default, enum.Enum): |
return f"`{str(default)}`" |
elif isinstance(default, int): |
return str(default) |
elif isinstance(default, float): |
result = str(default) |
return str(round(default, 2)) if len(result) > 6 else result |
elif isinstance(default, str): |
return str(default) if default.isnumeric() else f'`"{default}"`' |
elif isinstance(default, type): |
return f"`{default.__name__}`" |
else: |
return f"`{default}`" |
def eval_math_expression(expression: str) -> Optional[Union[float, int]]: |
""" |
Evaluate (safely) a mathematial expression and returns its value. |
Args: |
expression (`str`): The expression to evaluate. |
Returns: |
`Optional[Union[float, int]]`: Returns `None` if the evaluation fails in any way and the value computed |
otherwise. |
Example: |
```py |
>>> eval_expr('2^6') |
4 |
>>> eval_expr('2**6') |
64 |
>>> eval_expr('1 + 2*3**(4^5) / (6 + -7)') |
-5.0 |
``` |
""" |
try: |
return eval_node(ast.parse(expression, mode="eval").body) |
except TypeError: |
return |
def eval_node(node): |
if isinstance(node, ast.Num): |
return node.n |
elif isinstance(node, ast.BinOp): |
return MATH_OPERATORS[type(node.op)](eval_node(node.left), eval_node(node.right)) |
elif isinstance(node, ast.UnaryOp): |
return MATH_OPERATORS[type(node.op)](eval_node(node.operand)) |
else: |
raise TypeError(node) |
def replace_default_in_arg_description(description: str, default: Any) -> str: |
""" |
Catches the default value in the description of an argument inside a docstring and replaces it by the value passed. |
Args: |
description (`str`): The description of an argument in a docstring to process. |
default (`Any`): The default value that whould be in the docstring of that argument. |
Returns: |
`str`: The description updated with the new default value. |
""" |
description = description.replace("`optional`", OPTIONAL_KEYWORD) |
description = description.replace("**optional**", OPTIONAL_KEYWORD) |
if default is inspect._empty: |
idx = description.find(OPTIONAL_KEYWORD) |
if idx != -1: |
description = description[:idx].rstrip() |
if description.endswith(","): |
description = description[:-1].rstrip() |
elif default is None: |
idx = description.find(OPTIONAL_KEYWORD) |
if idx == -1: |
description = f"{description}, {OPTIONAL_KEYWORD}" |
elif re.search(r"defaults to `?None`?", description) is not None: |
len_optional = len(OPTIONAL_KEYWORD) |
description = description[: idx + len_optional] |
else: |
str_default = None |
if isinstance(default, (int, float)) and re.search("defaults to `?(.*?)(?:`|$)", description) is not None: |
current_default = re.search("defaults to `?(.*?)(?:`|$)", description).groups()[0] |
if default == eval_math_expression(current_default): |
try: |
str_default = str(type(default)(current_default)) |
except Exception: |
str_default = f"`{current_default}`" |
elif isinstance(default, enum.Enum) and default.name == current_default.split(".")[-1]: |
str_default = f"`{current_default}`" |
if str_default is None: |
str_default = stringify_default(default) |
if OPTIONAL_KEYWORD not in description: |
description = f"{description}, {OPTIONAL_KEYWORD}, defaults to {str_default}" |
elif _re_parse_description.search(description) is None: |
idx = description.find(OPTIONAL_KEYWORD) |
len_optional = len(OPTIONAL_KEYWORD) |
description = f"{description[:idx + len_optional]}, defaults to {str_default}" |
else: |
description = _re_parse_description.sub(rf"*optional*, defaults to {str_default}", description) |
return description |
def get_default_description(arg: inspect.Parameter) -> str: |
""" |
Builds a default description for a parameter that was not documented. |
Args: |
arg (`inspect.Parameter`): The argument in the signature to generate a description for. |
Returns: |
`str`: The description. |
""" |
if arg.annotation is inspect._empty: |
arg_type = "<fill_type>" |
elif hasattr(arg.annotation, "__name__"): |
arg_type = arg.annotation.__name__ |
else: |
arg_type = str(arg.annotation) |
if arg.default is inspect._empty: |
return f"`{arg_type}`" |
elif arg.default is None: |
return f"`{arg_type}`, {OPTIONAL_KEYWORD}" |
else: |
str_default = stringify_default(arg.default) |
return f"`{arg_type}`, {OPTIONAL_KEYWORD}, defaults to {str_default}" |
def find_source_file(obj: Any) -> Path: |
""" |
Finds the source file of an object. |
Args: |
obj (`Any`): The object whose source file we are looking for. |
Returns: |
`Path`: The source file. |
""" |
module = obj.__module__ |
for part in module.split(".")[1:]: |
obj_file = obj_file / part |
return obj_file.with_suffix(".py") |
def match_docstring_with_signature(obj: Any) -> Optional[Tuple[str, str]]: |
""" |
Matches the docstring of an object with its signature. |
Args: |
obj (`Any`): The object to process. |
Returns: |
`Optional[Tuple[str, str]]`: Returns `None` if there is no docstring or no parameters documented in the |
docstring, otherwise returns a tuple of two strings: the current documentation of the arguments in the |
docstring and the one matched with the signature. |
""" |
if len(getattr(obj, "__doc__", "")) == 0: |
return |
try: |
source, _ = inspect.getsourcelines(obj) |
except OSError: |
source = [] |
idx = 0 |
while idx < len(source) and '"""' not in source[idx]: |
idx += 1 |
ignore_order = False |
if idx < len(source): |
line_before_docstring = source[idx - 1] |
if re.search(r"^\s*#\s*no-format\s*$", line_before_docstring): |
return |
elif re.search(r"^\s*#\s*ignore-order\s*$", line_before_docstring): |
ignore_order = True |
signature = inspect.signature(obj).parameters |
obj_doc_lines = obj.__doc__.split("\n") |
idx = 0 |
while idx < len(obj_doc_lines) and _re_args.search(obj_doc_lines[idx]) is None: |
idx += 1 |
if idx == len(obj_doc_lines): |
return |
indent = find_indent(obj_doc_lines[idx]) |
arguments = {} |
current_arg = None |
idx += 1 |
start_idx = idx |
while idx < len(obj_doc_lines) and ( |
len(obj_doc_lines[idx].strip()) == 0 or find_indent(obj_doc_lines[idx]) > indent |
): |
if find_indent(obj_doc_lines[idx]) == indent + 4: |
re_search_arg = _re_parse_arg.search(obj_doc_lines[idx]) |
if re_search_arg is not None: |
_, name, description = re_search_arg.groups() |
current_arg = name |
if name in signature: |
default = signature[name].default |
if signature[name].kind is inspect._ParameterKind.VAR_KEYWORD: |
default = None |
new_description = replace_default_in_arg_description(description, default) |
else: |
new_description = description |
init_doc = _re_parse_arg.sub(rf"\1\2 ({new_description}):", obj_doc_lines[idx]) |
arguments[current_arg] = [init_doc] |
elif current_arg is not None: |
arguments[current_arg].append(obj_doc_lines[idx]) |
idx += 1 |
idx -= 1 |
while len(obj_doc_lines[idx].strip()) == 0: |
arguments[current_arg] = arguments[current_arg][:-1] |
idx -= 1 |
idx += 1 |
old_doc_arg = "\n".join(obj_doc_lines[start_idx:idx]) |
old_arguments = list(arguments.keys()) |
arguments = {name: "\n".join(doc) for name, doc in arguments.items()} |
for name in set(signature.keys()) - set(arguments.keys()): |
arg = signature[name] |
if name.startswith("_") or arg.kind in [ |
inspect._ParameterKind.VAR_KEYWORD, |
inspect._ParameterKind.VAR_POSITIONAL, |
]: |
arguments[name] = "" |
else: |
arg_desc = get_default_description(arg) |
arguments[name] = " " * (indent + 4) + f"{name} ({arg_desc}): <fill_docstring>" |
if ignore_order: |
new_param_docs = [arguments[name] for name in old_arguments if name in signature] |
missing = set(signature.keys()) - set(old_arguments) |
new_param_docs.extend([arguments[name] for name in missing if len(arguments[name]) > 0]) |
else: |
new_param_docs = [arguments[name] for name in signature.keys() if len(arguments[name]) > 0] |
new_doc_arg = "\n".join(new_param_docs) |
return old_doc_arg, new_doc_arg |
def fix_docstring(obj: Any, old_doc_args: str, new_doc_args: str): |
""" |
Fixes the docstring of an object by replacing its arguments documentaiton by the one matched with the signature. |
Args: |
obj (`Any`): |
The object whose dostring we are fixing. |
old_doc_args (`str`): |
The current documentation of the parameters of `obj` in the docstring (as returned by |
`match_docstring_with_signature`). |
new_doc_args (`str`): |
The documentation of the parameters of `obj` matched with its signature (as returned by |
`match_docstring_with_signature`). |
""" |
source, line_number = inspect.getsourcelines(obj) |
idx = 0 |
while idx < len(source) and _re_args.search(source[idx]) is None: |
idx += 1 |
if idx == len(source): |
return |
indent = find_indent(source[idx]) |
idx += 1 |
start_idx = idx |
while idx < len(source) and (len(source[idx].strip()) == 0 or find_indent(source[idx]) > indent): |
idx += 1 |
idx -= 1 |
while len(source[idx].strip()) == 0: |
idx -= 1 |
idx += 1 |
if "".join(source[start_idx:idx])[:-1] != old_doc_args: |
return |
obj_file = find_source_file(obj) |
with open(obj_file, "r", encoding="utf-8") as f: |
content = f.read() |
lines = content.split("\n") |
lines = lines[: line_number + start_idx - 1] + [new_doc_args] + lines[line_number + idx - 1 :] |
print(f"Fixing the docstring of {obj.__name__} in {obj_file}.") |
with open(obj_file, "w", encoding="utf-8") as f: |
f.write("\n".join(lines)) |
def check_docstrings(overwrite: bool = False): |
""" |
Check docstrings of all public objects that are callables and are documented. |
Args: |
overwrite (`bool`, *optional*, defaults to `False`): |
Whether to fix inconsistencies or not. |
""" |
failures = [] |
hard_failures = [] |
to_clean = [] |
for name in dir(transformers): |
if name.startswith("_") or ignore_undocumented(name) or name in OBJECTS_TO_IGNORE: |
continue |
obj = getattr(transformers, name) |
if not callable(obj) or not isinstance(obj, type) or getattr(obj, "__doc__", None) is None: |
continue |
try: |
result = match_docstring_with_signature(obj) |
if result is not None: |
old_doc, new_doc = result |
else: |
old_doc, new_doc = None, None |
except Exception as e: |
print(e) |
hard_failures.append(name) |
continue |
if old_doc != new_doc: |
if overwrite: |
fix_docstring(obj, old_doc, new_doc) |
else: |
failures.append(name) |
elif not overwrite and new_doc is not None and ("<fill_type>" in new_doc or "<fill_docstring>" in new_doc): |
to_clean.append(name) |
error_message = "" |
if len(hard_failures) > 0: |
error_message += ( |
"The argument part of the docstrings of the following objects could not be processed, check they are " |
"properly formatted." |
) |
error_message += "\n" + "\n".join([f"- {name}" for name in hard_failures]) |
if len(failures) > 0: |
error_message += ( |
"The following objects docstrings do not match their signature. Run `make fix-copies` to fix this. " |
"In some cases, this error may be raised incorrectly by the docstring checker. If you think this is the " |
"case, you can manually check the docstrings and then add the object name to `OBJECTS_TO_IGNORE` in " |
"`utils/check_docstrings.py`." |
) |
error_message += "\n" + "\n".join([f"- {name}" for name in failures]) |
if len(to_clean) > 0: |
error_message += ( |
"The following objects docstrings contain templates you need to fix: search for `<fill_type>` or " |
"`<fill_docstring>`." |
) |
error_message += "\n" + "\n".join([f"- {name}" for name in to_clean]) |
if len(error_message) > 0: |
error_message = "There was at least one problem when checking docstrings of public objects.\n" + error_message |
raise ValueError(error_message) |
if __name__ == "__main__": |
parser = argparse.ArgumentParser() |
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.") |
args = parser.parse_args() |
check_docstrings(overwrite=args.fix_and_overwrite) |