from typing import List, Union import numpy as np # DEPRECATED: for dataset preprocessing use torch version. # Here is datatype issue this code converts torch.float32 to numpy.float64 # And it causes error in the model training: # Error: RuntimeError: Input type (torch.cuda.DoubleTensor) and weight type (torch.cuda.FloatTensor) should be the same! def pad_1D(inputs: List[np.ndarray], pad_value: float = 0.0) -> np.ndarray: r"""Pad a list of 1D numpy arrays to the same length. Args: inputs (List[np.ndarray]): List of 1D numpy arrays to pad. pad_value (float): Value to use for padding. Default is 0.0. Returns: np.ndarray: Padded 2D numpy array of shape (len(inputs), max_len), where max_len is the length of the longest input array. """ def pad_data(x: np.ndarray, length: int) -> np.ndarray: r"""Pad a 1D numpy array with zeros to a specified length. Args: x (np.ndarray): 1D numpy array to pad. length (int): Length to pad the array to. Returns: np.ndarray: Padded 1D numpy array of shape (length,). """ return np.pad( x, (0, length - x.shape[0]), mode="constant", constant_values=pad_value, ) max_len = max(len(x) for x in inputs) return np.stack([pad_data(x, max_len) for x in inputs]) def pad_2D( inputs: List[np.ndarray], maxlen: Union[int, None] = None, pad_value: float = 0.0, ) -> np.ndarray: r"""Pad a list of 2D numpy arrays to the same length. Args: inputs (List[np.ndarray]): List of 2D numpy arrays to pad. maxlen (Union[int, None]): Maximum length to pad the arrays to. If None, pad to the length of the longest array. Default is None. pad_value (float): Value to use for padding. Default is 0.0. Returns: np.ndarray: Padded 3D numpy array of shape (len(inputs), max_len, input_dim), where max_len is the maximum length of the input arrays, and input_dim is the dimension of the input arrays. """ def pad(x: np.ndarray, max_len: int) -> np.ndarray: r"""Pad a 2D numpy array with zeros to a specified length. Args: x (np.ndarray): 2D numpy array to pad. max_len (int): Maximum length to pad the array to. Returns: np.ndarray: Padded 2D numpy array of shape (x.shape[0], max_len), where x.shape[0] is the number of rows in the input array. """ if np.shape(x)[1] > max_len: raise ValueError("not max_len") padding = np.ones((x.shape[0], max_len - np.shape(x)[1])) * pad_value return np.concatenate((x, padding), 1) if maxlen: output = np.stack([pad(x, maxlen) for x in inputs]) else: max_len = max(np.shape(x)[1] for x in inputs) output = np.stack([pad(x, max_len) for x in inputs]) return output def pad_3D(inputs: Union[np.ndarray, List[np.ndarray]], B: int, T: int, L: int) -> np.ndarray: r"""Pad a 3D numpy array to a specified shape. Args: inputs (np.ndarray): 3D numpy array to pad. B (int): Batch size to pad the array to. T (int): Time steps to pad the array to. L (int): Length to pad the array to. Returns: np.ndarray: Padded 3D numpy array of shape (B, T, L), where B is the batch size, T is the time steps, and L is the length. """ inputs_padded = np.zeros((B, T, L), dtype=np.float32) for i, input_ in enumerate(inputs): inputs_padded[i, : np.shape(input_)[0], : np.shape(input_)[1]] = input_ return inputs_padded