p208p2002
/

chatglm3-6b-chat-template

@@ -262,67 +262,4 @@ class ChatGLMTokenizer(PreTrainedTokenizer):
         token_ids_0 = prefix_tokens + token_ids_0
         if token_ids_1 is not None:
             token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
-        return token_ids_0
-    def _pad(
-        self,
-        encoded_inputs: Union[Dict[str, EncodedInput], BatchEncoding],
-        max_length: Optional[int] = None,
-        padding_strategy: PaddingStrategy = PaddingStrategy.DO_NOT_PAD,
-        pad_to_multiple_of: Optional[int] = None,
-        return_attention_mask: Optional[bool] = None,
-    ) -> dict:
-        """
-        Pad encoded inputs (on left/right and up to predefined length or max length in the batch)
-        Args:
-            encoded_inputs:
-                Dictionary of tokenized inputs (`List[int]`) or batch of tokenized inputs (`List[List[int]]`).
-            max_length: maximum length of the returned list and optionally padding length (see below).
-                Will truncate by taking into account the special tokens.
-            padding_strategy: PaddingStrategy to use for padding.
-                - PaddingStrategy.LONGEST Pad to the longest sequence in the batch
-                - PaddingStrategy.MAX_LENGTH: Pad to the max length (default)
-                - PaddingStrategy.DO_NOT_PAD: Do not pad
-                The tokenizer padding sides are defined in self.padding_side:
-                    - 'left': pads on the left of the sequences
-                    - 'right': pads on the right of the sequences
-            pad_to_multiple_of: (optional) Integer if set will pad the sequence to a multiple of the provided value.
-                This is especially useful to enable the use of Tensor Core on NVIDIA hardware with compute capability
-                `>= 7.5` (Volta).
-            return_attention_mask:
-                (optional) Set to False to avoid returning attention mask (default: set to model specifics)
-        """
-        # Load from model defaults
-        assert self.padding_side == "left"
-        required_input = encoded_inputs[self.model_input_names[0]]
-        seq_length = len(required_input)
-        if padding_strategy == PaddingStrategy.LONGEST:
-            max_length = len(required_input)
-        if max_length is not None and pad_to_multiple_of is not None and (max_length % pad_to_multiple_of != 0):
-            max_length = ((max_length // pad_to_multiple_of) + 1) * pad_to_multiple_of
-        needs_to_be_padded = padding_strategy != PaddingStrategy.DO_NOT_PAD and len(required_input) != max_length
-        # Initialize attention mask if not present.
-        if "attention_mask" not in encoded_inputs:
-            encoded_inputs["attention_mask"] = [1] * seq_length
-        if "position_ids" not in encoded_inputs:
-            encoded_inputs["position_ids"] = list(range(seq_length))
-        if needs_to_be_padded:
-            difference = max_length - len(required_input)
-            if "attention_mask" in encoded_inputs:
-                encoded_inputs["attention_mask"] = [0] * difference + encoded_inputs["attention_mask"]
-            if "position_ids" in encoded_inputs:
-                encoded_inputs["position_ids"] = [0] * difference + encoded_inputs["position_ids"]
-            encoded_inputs[self.model_input_names[0]] = [self.pad_token_id] * difference + required_input
-        return encoded_inputs

         token_ids_0 = prefix_tokens + token_ids_0
         if token_ids_1 is not None:
             token_ids_0 = token_ids_0 + token_ids_1 + [self.get_command("<eos>")]
+        return token_ids_0