Violin_midi_pro

Running

App Files Files Community

shethjenil commited on Nov 16, 2024

Commit

c2ae55d

verified ·

1 Parent(s): 9ea3ca5

Upload folder using huggingface_hub

Browse files

Files changed (30) hide show

musc/dtw/__init__.py +0 -0
musc/dtw/__pycache__/__init__.cpython-310.pyc +0 -0
musc/dtw/__pycache__/__init__.cpython-39.pyc +0 -0
musc/dtw/__pycache__/anchor.cpython-310.pyc +0 -0
musc/dtw/__pycache__/anchor.cpython-39.pyc +0 -0
musc/dtw/__pycache__/core.__C_to_DE-6.py310.1.nbc +0 -0
musc/dtw/__pycache__/core.__C_to_DE-6.py310.2.nbc +0 -0
musc/dtw/__pycache__/core.__C_to_DE-6.py310.3.nbc +0 -0
musc/dtw/__pycache__/core.__C_to_DE-6.py310.nbi +0 -0
musc/dtw/__pycache__/core.__E_to_warping_path-82.py310.1.nbc +0 -0
musc/dtw/__pycache__/core.__E_to_warping_path-82.py310.2.nbc +0 -0
musc/dtw/__pycache__/core.__E_to_warping_path-82.py310.3.nbc +0 -0
musc/dtw/__pycache__/core.__E_to_warping_path-82.py310.nbi +0 -0
musc/dtw/__pycache__/core.cpython-310.pyc +0 -0
musc/dtw/__pycache__/core.cpython-39.pyc +0 -0
musc/dtw/__pycache__/cost.cpython-310.pyc +0 -0
musc/dtw/__pycache__/cost.cpython-39.pyc +0 -0
musc/dtw/__pycache__/mrmsdtw.cpython-310.pyc +0 -0
musc/dtw/__pycache__/mrmsdtw.cpython-39.pyc +0 -0
musc/dtw/__pycache__/utils.cpython-310.pyc +0 -0
musc/dtw/__pycache__/utils.cpython-39.pyc +0 -0
musc/dtw/__pycache__/visualization.cpython-310.pyc +0 -0
musc/dtw/__pycache__/visualization.cpython-39.pyc +0 -0
musc/dtw/anchor.py +147 -0
musc/dtw/core.py +205 -0
musc/dtw/cost.py +80 -0
musc/dtw/mrmsdtw.py +616 -0
musc/dtw/utils.py +426 -0
musc/dtw/visualization.py +216 -0
musc/model.py +275 -0

musc/dtw/__init__.py ADDED Viewed

File without changes

musc/dtw/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (169 Bytes). View file

musc/dtw/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (181 Bytes). View file

musc/dtw/__pycache__/anchor.cpython-310.pyc ADDED Viewed

Binary file (4.23 kB). View file

musc/dtw/__pycache__/anchor.cpython-39.pyc ADDED Viewed

Binary file (4.21 kB). View file

musc/dtw/__pycache__/core.__C_to_DE-6.py310.1.nbc ADDED Viewed

Binary file (113 kB). View file

musc/dtw/__pycache__/core.__C_to_DE-6.py310.2.nbc ADDED Viewed

Binary file (111 kB). View file

musc/dtw/__pycache__/core.__C_to_DE-6.py310.3.nbc ADDED Viewed

Binary file (111 kB). View file

musc/dtw/__pycache__/core.__C_to_DE-6.py310.nbi ADDED Viewed

Binary file (3.27 kB). View file

musc/dtw/__pycache__/core.__E_to_warping_path-82.py310.1.nbc ADDED Viewed

Binary file (68 kB). View file

musc/dtw/__pycache__/core.__E_to_warping_path-82.py310.2.nbc ADDED Viewed

Binary file (68 kB). View file

musc/dtw/__pycache__/core.__E_to_warping_path-82.py310.3.nbc ADDED Viewed

Binary file (68 kB). View file

musc/dtw/__pycache__/core.__E_to_warping_path-82.py310.nbi ADDED Viewed

Binary file (3.35 kB). View file

musc/dtw/__pycache__/core.cpython-310.pyc ADDED Viewed

Binary file (5.45 kB). View file

musc/dtw/__pycache__/core.cpython-39.pyc ADDED Viewed

Binary file (5.41 kB). View file

musc/dtw/__pycache__/cost.cpython-310.pyc ADDED Viewed

Binary file (2.94 kB). View file

musc/dtw/__pycache__/cost.cpython-39.pyc ADDED Viewed

Binary file (2.92 kB). View file

musc/dtw/__pycache__/mrmsdtw.cpython-310.pyc ADDED Viewed

Binary file (16.3 kB). View file

musc/dtw/__pycache__/mrmsdtw.cpython-39.pyc ADDED Viewed

Binary file (16.1 kB). View file

musc/dtw/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (12.3 kB). View file

musc/dtw/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (12.3 kB). View file

musc/dtw/__pycache__/visualization.cpython-310.pyc ADDED Viewed

Binary file (5.64 kB). View file

musc/dtw/__pycache__/visualization.cpython-39.pyc ADDED Viewed

Binary file (5.58 kB). View file

musc/dtw/anchor.py ADDED Viewed

	@@ -0,0 +1,147 @@

+from numba import jit
+import numpy as np
+from typing import Tuple
+def project_alignment_on_a_new_feature_rate(alignment: np.ndarray,
+                                            feature_rate_old: int,
+                                            feature_rate_new: int,
+                                            cost_matrix_size_old: tuple = (),
+                                            cost_matrix_size_new: tuple = ()) -> np.ndarray:
+    """Projects an alignment computed for a cost matrix on a certain
+    feature resolution on a cost matrix having a different feature
+    resolution.
+    Parameters
+    ----------
+    alignment : np.ndarray [shape=(2, N)]
+        Alignment matrix
+    feature_rate_old : int
+        Feature rate of the old cost matrix
+    feature_rate_new : int
+        Feature rate of the new cost matrix
+    cost_matrix_size_old : tuple
+        Size of the old cost matrix. Possibly needed to deal with border cases
+    cost_matrix_size_new : tuple
+        Size of the new cost matrix. Possibly needed to deal with border cases
+    Returns
+    -------
+    np.ndarray [shape=(2, N)]
+        Anchor sequence for the new cost matrix
+    """
+    # Project the alignment on the new feature rate
+    fac = feature_rate_new / feature_rate_old
+    anchors = np.round(alignment * fac) + 1
+    # In case the sizes of the cost matrices are given explicitly and the
+    # alignment specifies to align the first and last elements, handle this case
+    # separately since this might cause problems in the general projection
+    # procedure.
+    if cost_matrix_size_old is not None and cost_matrix_size_new is not None:
+        if np.array_equal(alignment[:, 0], np.array([0, 0])):
+            anchors[:, 0] = np.array([1, 1])
+        if np.array_equal(alignment[:, -1], np.array(cost_matrix_size_old) - 1):
+            anchors[:, -1] = np.array(cost_matrix_size_new)
+    return anchors - 1
+def derive_anchors_from_projected_alignment(projected_alignment: np.ndarray,
+                                            threshold: int) -> np.ndarray:
+    """Derive anchors from a projected alignment such that the area of the rectangle
+    defined by two subsequent anchors a1 and a2 is below a given threshold.
+    Parameters
+    ----------
+    projected_alignment : np.ndarray [shape=(2, N)]
+        Projected alignment array
+    threshold : int
+        Maximum area of the constraint rectangle
+    Returns
+    -------
+    anchors_res : np.ndarray [shape=(2, M)]
+        Resulting anchor sequence
+    """
+    L = projected_alignment.shape[1]
+    a1 = np.array(projected_alignment[:, 0], copy=True).reshape(-1, 1)
+    a2 = np.array(projected_alignment[:, -1], copy=True).reshape(-1, 1)
+    if __compute_area(a1, a2) <= threshold:
+        anchors_res = np.concatenate([a1, a2], axis=1)
+    elif L > 2:
+        center = int(np.floor(L/2 + 1))
+        a1 = np.array(projected_alignment[:, 0], copy=True).reshape(-1, 1)
+        a2 = np.array(projected_alignment[:, center - 1], copy=True).reshape(-1, 1)
+        a3 = np.array(projected_alignment[:, -1], copy=True).reshape(-1, 1)
+        if __compute_area(a1, a2) > threshold:
+            anchors_1 = derive_anchors_from_projected_alignment(projected_alignment[:, 0:center], threshold)
+        else:
+            anchors_1 = np.concatenate([a1, a2], axis=1)
+        if __compute_area(a2, a3) > threshold:
+            anchors_2 = derive_anchors_from_projected_alignment(projected_alignment[:, center - 1:], threshold)
+        else:
+            anchors_2 = np.concatenate([a2, a3], axis=1)
+        anchors_res = np.concatenate([anchors_1, anchors_2[:, 1:]], axis=1)
+    else:
+        if __compute_area(a1, a2) > threshold:
+            print('Only two anchor points are given which do not fulfill the constraint.')
+        anchors_res = np.concatenate([a1, a2], axis=1)
+    return anchors_res
+def derive_neighboring_anchors(warping_path: np.ndarray,
+                               anchor_indices: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
+    """Compute anchor points in the neighborhood of previous anchor points.
+    Parameters
+    ----------
+    warping_path : np.ndarray [shape=(2, N)]
+        Warping path
+    anchor_indices : np.ndarray
+        Indices corresponding to the anchor points in the ``warping_path``
+    Returns
+    -------
+    neighboring_anchors : np.ndarray [shape=(2, N-1)]
+        Sequence of neighboring anchors
+    neighboring_anchor_indices : np.ndarray
+        Indices into ``warping path`` corresponding to ``neighboring_anchors``
+    """
+    L = anchor_indices.shape[0]
+    neighboring_anchor_indices = np.zeros(L-1, dtype=int)
+    neighboring_anchors = np.zeros((2, L-1),  dtype=int)
+    for k in range(1, L):
+        i1 = anchor_indices[k-1]
+        i2 = anchor_indices[k]
+        neighboring_anchor_indices[k-1] = i1 + np.floor((i2 - i1) / 2)
+        neighboring_anchors[:, k-1] = warping_path[:, neighboring_anchor_indices[k - 1]]
+    return neighboring_anchors, neighboring_anchor_indices
+@jit(nopython=True)
+def __compute_area(a: tuple,
+                   b: tuple):
+    """Computes the area between two points, given as tuples"""
+    return (b[0] - a[0] + 1) * (b[1] - a[1] + 1)

musc/dtw/core.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import librosa
+from numba import jit
+import numpy as np
+@jit(nopython=True, cache=True)
+def __C_to_DE(C: np.ndarray = None,
+              dn: np.ndarray = np.array([1, 1, 0], np.int64),
+              dm: np.ndarray = np.array([1, 0, 1], np.int64),
+              dw: np.ndarray = np.array([1.0, 1.0, 1.0], np.float64),
+              sub_sequence: bool = False) -> (np.ndarray, np.ndarray):
+    """This function computes the accumulated cost matrix D and the step index
+    matrix E.
+    Parameters
+    ----------
+    C : np.ndarray (np.float32 / np.float64) [shape=(N, M)]
+        Cost matrix
+    dn : np.ndarray (np.int64) [shape=(1, S)]
+        Integer array defining valid steps (N direction of C), default: [1, 1, 0]
+    dm : np.ndarray (np.int64) [shape=(1, S)]
+        Integer array defining valid steps (M direction of C), default: [1, 0, 1]
+    dw : np.ndarray (np.float64) [shape=(1, S)]
+        Double array defining the weight of the each step, default: [1.0, 1.0, 1.0]
+    sub_sequence : bool
+        Set `True` for SubSequence DTW, default: False
+    Returns
+    -------
+    D : np.ndarray (np.float64) [shape=(N, M)]
+        Accumulated cost matrix of type double
+    E : np.ndarray (np.int64) [shape=(N, M)]
+        Step index matrix.
+        E[n, m] holds the index of the step take to determine the value of D[n, m].
+        If E[n, m] is zero, no valid step was possible.
+        NaNs in the cost matrix are preserved, invalid fields in the cost matrix are NaNs.
+    """
+    if C is None:
+        raise ValueError('C must be a 2D numpy array.')
+    N, M = C.shape
+    S = dn.size
+    if S != dm.size or S != dw.size:
+        raise ValueError('The parameters dn,dm, and dw must be of equal length.')
+    # calc bounding box size of steps
+    sbbn = np.max(dn)
+    sbbm = np.max(dm)
+    # initialize E
+    E = np.zeros((N, M), np.int64) - 1
+    # initialize extended D matrix
+    D = np.ones((sbbn + N, sbbm + M), np.float64) * np.inf
+    if sub_sequence:
+        for m in range(M):
+            D[sbbn, sbbm + m] = C[0, m]
+    else:
+        D[sbbn, sbbm] = C[0, 0]
+    # accumulate
+    for m in range(sbbm, M + sbbm):
+        for n in range(sbbn, N + sbbn):
+            for s in range(S):
+                cost = D[n - dn[s], m - dm[s]] + C[n - sbbn, m - sbbm] * dw[s]
+                if cost < D[n, m]:
+                    D[n, m] = cost
+                    E[n - sbbn, m - sbbm] = s
+    D = D[sbbn: N + sbbn, sbbm: M + sbbm]
+    return D, E
+@jit(nopython=True, cache=True)
+def __E_to_warping_path(E: np.ndarray,
+                        dn: np.ndarray = np.array([1, 1, 0], np.int64),
+                        dm: np.ndarray = np.array([1, 0, 1], np.int64),
+                        sub_sequence: bool = False,
+                        end_index: int = -1) -> np.ndarray:
+    """This function computes a warping path based on the provided matrix E
+    and the allowed steps.
+    Parameters
+    ----------
+    E : np.ndarray (np.int64) [shape=(N, M)]
+        Step index matrix
+    dn : np.ndarray (np.int64) [shape=(1, S)]
+        Integer array defining valid steps (N direction of C), default: [1, 1, 0]
+    dm : np.ndarray (np.int64) [shape=(1, S)]
+         Integer array defining valid steps (M direction of C), default: [1, 0, 1]
+    sub_sequence : bool
+        Set `True` for SubSequence DTW, default: False
+    end_index : int
+        In case of SubSequence DTW
+    Returns
+    -------
+    warping_path : np.ndarray (np.int64) [shape=(2, M)]
+        Resulting optimal warping path
+    """
+    N, M = E.shape
+    if not sub_sequence and end_index == -1:
+        end_index = M - 1
+    m = end_index
+    n = N - 1
+    warping_path = np.zeros((2, n + m + 1))
+    index = 0
+    def _loop(m, n, index):
+        warping_path[:, index] = np.array([n, m])
+        step_index = E[n, m]
+        m -= dm[step_index]
+        n -= dn[step_index]
+        index += 1
+        return m, n, index
+    if sub_sequence:
+        while n > 0:
+            m, n, index = _loop(m, n, index)
+    else:
+        while m > 0 or n > 0:
+            m, n, index = _loop(m, n, index)
+    warping_path[:, index] = np.array([n, m])
+    warping_path = warping_path[:, index::-1]
+    return warping_path
+def compute_warping_path(C: np.ndarray,
+                         step_sizes: np.ndarray = np.array([[1, 0], [0, 1], [1, 1]], np.int64),
+                         step_weights: np.ndarray = np.array([1.0, 1.0, 1.0], np.float64),
+                         implementation: str = 'synctoolbox'):
+    """Applies DTW on cost matrix C.
+    Parameters
+    ----------
+    C : np.ndarray (np.float32 / np.float64) [shape=(N, M)]
+        Cost matrix
+    step_sizes : np.ndarray (np.int64) [shape=(2, S)]
+        Array of step sizes
+    step_weights : np.ndarray (np.float64) [shape=(2, S)]
+        Array of step weights
+    implementation: str
+        Choose among ``synctoolbox`` and ``librosa``. (default: ``synctoolbox``)
+    Returns
+    -------
+    D : np.ndarray (np.float64) [shape=(N, M)]
+        Accumulated cost matrix
+    E : np.ndarray (np.int64) [shape=(N, M)]
+        Step index matrix
+    wp : np.ndarray (np.int64) [shape=(2, M)]
+        Warping path
+    """
+    if implementation == 'librosa':
+        D, wp, E = librosa.sequence.dtw(C=C,
+                                        step_sizes_sigma=step_sizes,
+                                        weights_add=np.array([0, 0, 0]),
+                                        weights_mul=step_weights,
+                                        return_steps=True,
+                                        subseq=False)
+        wp = wp[::-1].T
+    elif implementation == 'synctoolbox':
+        dn = step_sizes[:, 0]
+        dm = step_sizes[:, 1]
+        D, E = __C_to_DE(C,
+                         dn=dn,
+                         dm=dm,
+                         dw=step_weights,
+                         sub_sequence=False)
+        wp = __E_to_warping_path(E=E,
+                                 dn=dn,
+                                 dm=dm,
+                                 sub_sequence=False)
+    else:
+        raise NotImplementedError(f'No implementation found called {implementation}')
+    return D, E, wp

musc/dtw/cost.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from numba import jit
+import numpy as np
+from sklearn.metrics.pairwise import euclidean_distances
+#@jit(nopython=True)
+def cosine_distance(f1, f2, cos_meas_max=2.0, cos_meas_min=1.0):
+    """For all pairs of vectors f1' and f2' in f1 and f2, computes 1 - (f1.f2),
+    where '.' is the dot product, and rescales the results to lie in the
+    range [cos_meas_min, cos_meas_max].
+    Corresponds to regular cosine distance if f1' and f2' are normalized and
+    cos_meas_min==0.0 and cos_meas_max==1.0."""
+    return (1 - f1.T @ f2) * (cos_meas_max - cos_meas_min) + cos_meas_min
+#@jit(nopython=True)
+def euclidean_distance(f1, f2, l2_meas_max=1.0, l2_meas_min=0.0):
+    """Computes euclidean distances between the vectors in f1 and f2, and
+    rescales the results to lie in the range [cos_meas_min, cos_meas_max]."""
+    #S1 = np.zeros((f1.shape[1], f2.shape[1]))
+    #for n in range(f2.shape[1]):
+    #    S1[:, n] = np.sqrt(np.sum((f1.T - f2[:, n]) ** 2, axis=1))
+    S1 = euclidean_distances(f1.T, f2.T)
+    return S1 * (l2_meas_max - l2_meas_min) + l2_meas_min
+def compute_high_res_cost_matrix(f_chroma1: np.ndarray,
+                                 f_chroma2: np.ndarray,
+                                 f_onset1: np.ndarray,
+                                 f_onset2: np.ndarray,
+                                 weights: np.ndarray = np.array([1.0, 1.0]),
+                                 cos_meas_min: float = 1.0,
+                                 cos_meas_max: float = 2.0,
+                                 l2_meas_min: float = 0.0,
+                                 l2_meas_max: float = 1.0):
+    """Computes cost matrix of two sequences using two feature matrices
+    for each sequence. Cosine distance is used for the chroma sequences and
+    euclidean distance is used for the DLNCO sequences.
+    Parameters
+    ----------
+    f_chroma1 : np.ndarray [shape=(12, N)]
+        Chroma feature matrix of the first sequence (assumed to be normalized).
+    f_chroma2 : np.ndarray [shape=(12, M)]
+        Chroma feature matrix of the second sequence (assumed to be normalized).
+    f_onset1 : np.ndarray [shape=(12, N)]
+        DLNCO feature matrix of the first sequence
+    f_onset2 : np.ndarray [shape=(12, M)]
+        DLNCO feature matrix of the second sequence
+    weights : np.ndarray [shape=[2,]]
+        Weights array for the high-resolution cost computation.
+        weights[0] * cosine_distance + weights[1] * euclidean_distance
+    cos_meas_min : float
+        Cosine distances are shifted to be at least ``cos_meas_min``
+    cos_meas_max : float
+        Cosine distances are scaled to be at most ``cos_meas_max``
+    l2_meas_min : float
+        Euclidean distances are shifted to be at least ``l2_meas_min``
+    l2_meas_max : float
+        Euclidean distances are scaled to be at most ``l2_meas_max``
+    Returns
+    -------
+    C: np.ndarray [shape=(N, M)]
+        Cost matrix
+    """
+    cos_dis = cosine_distance(f_chroma1, f_chroma2, cos_meas_min=cos_meas_min, cos_meas_max=cos_meas_max)
+    euc_dis = euclidean_distance(f_onset1, f_onset2, l2_meas_min=l2_meas_min, l2_meas_max=l2_meas_max)
+    return weights[0] * cos_dis + weights[1] * euc_dis

musc/dtw/mrmsdtw.py ADDED Viewed

	@@ -0,0 +1,616 @@

+from numba import jit
+import numpy as np
+import time
+from typing import List, Tuple, Optional
+from .anchor import derive_anchors_from_projected_alignment, derive_neighboring_anchors, \
+    project_alignment_on_a_new_feature_rate
+from .utils import build_path_from_warping_paths, compute_cost_matrices_between_anchors, smooth_downsample_feature, normalize_feature, compute_warping_paths_from_cost_matrices, find_anchor_indices_in_warping_path
+from .visualization import sync_visualize_step1, sync_visualize_step2
+def sync_via_mrmsdtw_with_anchors(f_chroma1: np.ndarray,
+                                  f_chroma2: np.ndarray,
+                                  f_onset1: np.ndarray = None,
+                                  f_onset2: np.ndarray = None,
+                                  input_feature_rate: float = 50,
+                                  step_sizes: np.ndarray = np.array([[1, 0], [0, 1], [1, 1]], np.int32),
+                                  step_weights: np.ndarray = np.array([1.0, 1.0, 1.0], np.float64),
+                                  threshold_rec: int = 10000,
+                                  win_len_smooth: np.ndarray = np.array([201, 101, 21, 1]),
+                                  downsamp_smooth: np.ndarray = np.array([50, 25, 5, 1]),
+                                  verbose: bool = False,
+                                  dtw_implementation: str = 'synctoolbox',
+                                  normalize_chroma: bool = True,
+                                  chroma_norm_ord: int = 2,
+                                  chroma_norm_threshold: float = 0.001,
+                                  visualization_title: str = "MrMsDTW result",
+                                  anchor_pairs: List[Tuple] = None,
+                                  linear_inp_idx: List[int] = [],
+                                  alpha=0.5) -> np.ndarray:
+    """Compute memory-restricted multi-scale DTW (MrMsDTW) using chroma and (optionally) onset features.
+        MrMsDTW is performed on multiple levels that get progressively finer, with rectangular constraint
+        regions defined by the alignment found on the previous, coarser level.
+        If onset features are provided, these are used on the finest level in addition to chroma
+        to provide higher synchronization accuracy.
+        Parameters
+        ----------
+        f_chroma1 : np.ndarray [shape=(12, N)]
+            Chroma feature matrix of the first sequence
+        f_chroma2 : np.ndarray [shape=(12, M)]
+            Chroma feature matrix of the second sequence
+        f_onset1 : np.ndarray [shape=(L, N)]
+            Onset feature matrix of the first sequence (optional, default: None)
+        f_onset2 : np.ndarray [shape=(L, M)]
+            Onset feature matrix of the second sequence (optional, default: None)
+        input_feature_rate: int
+            Input feature rate of the chroma features (default: 50)
+        step_sizes: np.ndarray
+            DTW step sizes (default: np.array([[1, 0], [0, 1], [1, 1]]))
+        step_weights: np.ndarray
+            DTW step weights (np.array([1.0, 1.0, 1.0]))
+        threshold_rec: int
+            Defines the maximum area that is spanned by the rectangle of two
+            consecutive elements in the alignment (default: 10000)
+        win_len_smooth : np.ndarray
+            Window lengths for chroma feature smoothing (default: np.array([201, 101, 21, 1]))
+        downsamp_smooth : np.ndarray
+            Downsampling factors (default: np.array([50, 25, 5, 1]))
+        verbose : bool
+            Set `True` for visualization (default: False)
+        dtw_implementation : str
+            DTW implementation, librosa or synctoolbox (default: synctoolbox)
+        normalize_chroma : bool
+            Set `True` to normalize input chroma features after each downsampling
+            and smoothing operation.
+        chroma_norm_ord: int
+            Order of chroma normalization, relevant if ``normalize_chroma`` is True.
+            (default: 2)
+        chroma_norm_threshold: float
+            If the norm falls below threshold for a feature vector, then the
+            normalized feature vector is set to be the unit vector. Relevant, if
+            ``normalize_chroma`` is True (default: 0.001)
+        visualization_title : str
+            Title for the visualization plots. Only relevant if 'verbose' is True
+            (default: "MrMsDTW result")
+        anchor_pairs: List[Tuple]
+            Anchor pairs given in seconds. Note that
+            * (0, 0) and (<audio-len1>, <audio-len2>) are not allowed.
+            * Anchors must be monotonously increasing.
+        linear_inp_idx: List[int]
+            List of the indices of intervals created by anchor pairs, for which
+            MrMsDTW shouldn't be run, e.g., if the interval only involves silence.
+            0        ap1        ap2        ap3
+            |         |          |          |
+            |  idx0   |   idx1   |  idx2    |  idx3 OR idx-1
+            |         |          |          |
+            Note that index -1 corresponds to the last interval, which begins with
+            the last anchor pair until the end of the audio files.
+        alpha: float
+            Coefficient for the Chroma cost matrix in the finest scale of the MrMsDTW algorithm.
+            C = alpha * C_Chroma + (1 - alpha) * C_act  (default: 0.5)
+        Returns
+        -------
+        wp : np.ndarray [shape=(2, T)]
+            Resulting warping path which indicates synchronized indices.
+    """
+    if anchor_pairs is None:
+        wp = sync_via_mrmsdtw(f_chroma1=f_chroma1,
+                              f_chroma2=f_chroma2,
+                              f_onset1=f_onset1,
+                              f_onset2=f_onset2,
+                              input_feature_rate=input_feature_rate,
+                              step_sizes=step_sizes,
+                              step_weights=step_weights,
+                              threshold_rec=threshold_rec,
+                              win_len_smooth=win_len_smooth,
+                              downsamp_smooth=downsamp_smooth,
+                              verbose=verbose,
+                              dtw_implementation=dtw_implementation,
+                              normalize_chroma=normalize_chroma,
+                              chroma_norm_ord=chroma_norm_ord,
+                              chroma_norm_threshold=chroma_norm_threshold,
+                              visualization_title=visualization_title,
+                              alpha=alpha)
+    else:
+        # constant_intervals = [((0,  x1), (0, y1), False),
+        #                       ((x1, x2), (y1, y2), True),
+        #                       ((x2, -1), (y2, -1), False)]
+        wp = None
+        if verbose:
+            print('Anchor points are given!')
+        __check_anchor_pairs(anchor_pairs, f_chroma1.shape[1], f_chroma2.shape[1], input_feature_rate)
+        # Add ending as the anchor point
+        anchor_pairs.append((-1, -1))
+        prev_a1 = 0
+        prev_a2 = 0
+        for idx, anchor_pair in enumerate(anchor_pairs):
+            cur_a1, cur_a2 = anchor_pair
+            # Split the features
+            f_chroma1_split, f_onset1_split, f_chroma2_split, f_onset2_split = __split_features(f_chroma1,
+                                                                                                f_onset1,
+                                                                                                f_chroma2,
+                                                                                                f_onset2,
+                                                                                                cur_a1,
+                                                                                                cur_a2,
+                                                                                                prev_a1,
+                                                                                                prev_a2,
+                                                                                                input_feature_rate)
+            if idx in linear_inp_idx or idx == len(anchor_pairs) - 1 and -1 in linear_inp_idx:
+                # Generate a diagonal warping path, if the algorithm is not supposed to executed.
+                # A typical scenario is the silence breaks which are enclosed by two anchor points.
+                if verbose:
+                    print('A diagonal warping path is generated for the interval \n\t Feature sequence 1: %.2f - %.2f'
+                          '\n\t Feature sequence 2: %.2f - %.2f\n' % (prev_a1, cur_a1, prev_a2, cur_a2))
+                wp_cur = __diagonal_warping_path(f_chroma1_split, f_chroma2_split)
+            else:
+                if verbose:
+                    if cur_a1 != -1 and cur_a2 != -1:
+                        print('MrMsDTW is applied for the interval \n\t Feature sequence 1: %.2f - %.2f'
+                              '\n\t Feature sequence 2: %.2f - %.2f\n' % (prev_a1, cur_a1, prev_a2, cur_a2))
+                    else:
+                        print('MrMsDTW is applied for the interval \n\t Feature sequence 1: %.2f - end'
+                              '\n\t Feature sequence 2: %.2f - end\n' % (prev_a1, prev_a2))
+                wp_cur = sync_via_mrmsdtw(f_chroma1=f_chroma1_split,
+                                          f_chroma2=f_chroma2_split,
+                                          f_onset1=f_onset1_split,
+                                          f_onset2=f_onset2_split,
+                                          input_feature_rate=input_feature_rate,
+                                          step_sizes=step_sizes,
+                                          step_weights=step_weights,
+                                          threshold_rec=threshold_rec,
+                                          win_len_smooth=win_len_smooth,
+                                          downsamp_smooth=downsamp_smooth,
+                                          verbose=verbose,
+                                          dtw_implementation=dtw_implementation,
+                                          normalize_chroma=normalize_chroma,
+                                          chroma_norm_ord=chroma_norm_ord,
+                                          chroma_norm_threshold=chroma_norm_threshold,
+                                          alpha=alpha)
+            if wp is None:
+                wp = np.array(wp_cur, copy=True)
+            # Concatenate warping paths
+            else:
+                wp = np.concatenate([wp, wp_cur + wp[:, -1].reshape(2, 1) + 1], axis=1)
+            prev_a1 = cur_a1
+            prev_a2 = cur_a2
+        anchor_pairs.pop()
+    return wp
+def sync_via_mrmsdtw(f_chroma1: np.ndarray,
+                     f_chroma2: np.ndarray,
+                     f_onset1: np.ndarray = None,
+                     f_onset2: np.ndarray = None,
+                     input_feature_rate: float = 50,
+                     step_sizes: np.ndarray = np.array([[1, 0], [0, 1], [1, 1]], np.int32),
+                     step_weights: np.ndarray = np.array([1.0, 1.0, 1.0], np.float64),
+                     threshold_rec: int = 10000,
+                     win_len_smooth: np.ndarray = np.array([201, 101, 21, 1]),
+                     downsamp_smooth: np.ndarray = np.array([50, 25, 5, 1]),
+                     verbose: bool = False,
+                     dtw_implementation: str = 'synctoolbox',
+                     normalize_chroma: bool = True,
+                     chroma_norm_ord: int = 2,
+                     chroma_norm_threshold: float = 0.001,
+                     visualization_title: str = "MrMsDTW result",
+                     alpha=0.5) -> np.ndarray:
+    """Compute memory-restricted multi-scale DTW (MrMsDTW) using chroma and (optionally) onset features.
+        MrMsDTW is performed on multiple levels that get progressively finer, with rectangular constraint
+        regions defined by the alignment found on the previous, coarser level.
+        If onset features are provided, these are used on the finest level in addition to chroma
+        to provide higher synchronization accuracy.
+        Parameters
+        ----------
+        f_chroma1 : np.ndarray [shape=(12, N)]
+            Chroma feature matrix of the first sequence
+        f_chroma2 : np.ndarray [shape=(12, M)]
+            Chroma feature matrix of the second sequence
+        f_onset1 : np.ndarray [shape=(L, N)]
+            Onset feature matrix of the first sequence (optional, default: None)
+        f_onset2 : np.ndarray [shape=(L, M)]
+            Onset feature matrix of the second sequence (optional, default: None)
+        input_feature_rate: int
+            Input feature rate of the chroma features (default: 50)
+        step_sizes: np.ndarray
+            DTW step sizes (default: np.array([[1, 0], [0, 1], [1, 1]]))
+        step_weights: np.ndarray
+            DTW step weights (np.array([1.0, 1.0, 1.0]))
+        threshold_rec: int
+            Defines the maximum area that is spanned by the rectangle of two
+            consecutive elements in the alignment (default: 10000)
+        win_len_smooth : np.ndarray
+            Window lengths for chroma feature smoothing (default: np.array([201, 101, 21, 1]))
+        downsamp_smooth : np.ndarray
+            Downsampling factors (default: np.array([50, 25, 5, 1]))
+        verbose : bool
+            Set `True` for visualization (default: False)
+        dtw_implementation : str
+            DTW implementation, librosa or synctoolbox (default: synctoolbox)
+        normalize_chroma : bool
+            Set `True` to normalize input chroma features after each downsampling
+            and smoothing operation.
+        chroma_norm_ord: int
+            Order of chroma normalization, relevant if ``normalize_chroma`` is True.
+            (default: 2)
+        chroma_norm_threshold: float
+            If the norm falls below threshold for a feature vector, then the
+            normalized feature vector is set to be the unit vector. Relevant, if
+            ``normalize_chroma`` is True (default: 0.001)
+        visualization_title : str
+            Title for the visualization plots. Only relevant if 'verbose' is True
+            (default: "MrMsDTW result")
+        alpha: float
+            Coefficient for the Chroma cost matrix in the finest scale of the MrMsDTW algorithm.
+            C = alpha * C_Chroma + (1 - alpha) * C_act  (default: 0.5)
+        Returns
+        -------
+        alignment: np.ndarray [shape=(2, T)]
+            Resulting warping path which indicates synchronized indices.
+    """
+    # If onset features are given as input, high resolution MrMsDTW is activated.
+    high_res = False
+    if f_onset1 is not None and f_onset2 is not None:
+        high_res = True
+    if high_res and (f_chroma1.shape[1] != f_onset1.shape[1] or f_chroma2.shape[1] != f_onset2.shape[1]):
+        raise ValueError('Chroma and onset features must be of the same length.')
+    if downsamp_smooth[-1] != 1 or win_len_smooth[-1] != 1:
+        raise ValueError('The downsampling factor of the last iteration must be equal to 1, i.e.'
+                         'at the last iteration, it is computed at the input feature rate!')
+    num_iterations = win_len_smooth.shape[0]
+    cost_matrix_size_old = tuple()
+    feature_rate_old = input_feature_rate / downsamp_smooth[0]
+    alignment = None
+    total_computation_time = 0.0
+    # If the area is less than the threshold_rec, don't apply the multiscale DTW.
+    it = (num_iterations - 1) if __compute_area(f_chroma1, f_chroma2) < threshold_rec else 0
+    while it < num_iterations:
+        tic1 = time.perf_counter()
+        # Smooth and downsample given raw features
+        f_chroma1_cur, _ = smooth_downsample_feature(f_chroma1,
+                                                     input_feature_rate=input_feature_rate,
+                                                     win_len_smooth=win_len_smooth[it],
+                                                     downsamp_smooth=downsamp_smooth[it])
+        f_chroma2_cur, feature_rate_new = smooth_downsample_feature(f_chroma2,
+                                                                    input_feature_rate=input_feature_rate,
+                                                                    win_len_smooth=win_len_smooth[it],
+                                                                    downsamp_smooth=downsamp_smooth[it])
+        if normalize_chroma:
+            f_chroma1_cur = normalize_feature(f_chroma1_cur,
+                                              norm_ord=chroma_norm_ord,
+                                              threshold=chroma_norm_threshold)
+            f_chroma2_cur = normalize_feature(f_chroma2_cur,
+                                              norm_ord=chroma_norm_ord,
+                                              threshold=chroma_norm_threshold)
+        # Project path onto new resolution
+        cost_matrix_size_new = (f_chroma1_cur.shape[1], f_chroma2_cur.shape[1])
+        if alignment is None:
+            # Initialize the alignment with the start and end frames of the feature sequence
+            anchors = np.array([[0, f_chroma1_cur.shape[1] - 1], [0, f_chroma2_cur.shape[1] - 1]])
+        else:
+            projected_alignment = project_alignment_on_a_new_feature_rate(alignment=alignment,
+                                                                          feature_rate_old=feature_rate_old,
+                                                                          feature_rate_new=feature_rate_new,
+                                                                          cost_matrix_size_old=cost_matrix_size_old,
+                                                                          cost_matrix_size_new=cost_matrix_size_new)
+            anchors = derive_anchors_from_projected_alignment(projected_alignment=projected_alignment,
+                                                              threshold=threshold_rec)
+        # Cost matrix and warping path computation
+        if high_res and it == num_iterations - 1:
+            # Compute cost considering chroma and pitch onset features and alignment only in the last iteration,
+            # where the features are at the finest level.
+            cost_matrices_step1 = compute_cost_matrices_between_anchors(f_chroma1=f_chroma1_cur,
+                                                                        f_chroma2=f_chroma2_cur,
+                                                                        f_onset1=f_onset1,
+                                                                        f_onset2=f_onset2,
+                                                                        anchors=anchors,
+                                                                        alpha=alpha)
+        else:
+            cost_matrices_step1 = compute_cost_matrices_between_anchors(f_chroma1=f_chroma1_cur,
+                                                                        f_chroma2=f_chroma2_cur,
+                                                                        anchors=anchors,
+                                                                        alpha=alpha)
+        wp_list = compute_warping_paths_from_cost_matrices(cost_matrices_step1,
+                                                           step_sizes=step_sizes,
+                                                           step_weights=step_weights,
+                                                           implementation=dtw_implementation)
+        # Concatenate warping paths
+        wp = build_path_from_warping_paths(warping_paths=wp_list,
+                                           anchors=anchors)
+        anchors_step1 = None
+        wp_step1 = None
+        num_rows_step1 = 0
+        num_cols_step1 = 0
+        ax = None
+        toc1 = time.perf_counter()
+        if verbose and cost_matrices_step1 is not None:
+            anchors_step1 = np.array(anchors, copy=True)
+            wp_step1 = np.array(wp, copy=True)
+            num_rows_step1, num_cols_step1 = np.sum(np.array([dtw_mat.shape for dtw_mat in cost_matrices_step1], int),
+                                                    axis=0)
+            fig, ax = sync_visualize_step1(cost_matrices_step1,
+                                           num_rows_step1,
+                                           num_cols_step1,
+                                           anchors,
+                                           wp)
+        tic2 = time.perf_counter()
+        # Compute neighboring anchors and refine alignment using local path between neighboring anchors
+        anchor_indices_in_warping_path = find_anchor_indices_in_warping_path(wp, anchors=anchors)
+        # Compute neighboring anchors for refinement
+        neighboring_anchors, neighboring_anchor_indices = \
+            derive_neighboring_anchors(wp, anchor_indices=anchor_indices_in_warping_path)
+        if neighboring_anchor_indices.shape[0] > 1 \
+                and it == num_iterations - 1 and high_res:
+            cost_matrices_step2 = compute_cost_matrices_between_anchors(f_chroma1=f_chroma1_cur,
+                                                                        f_chroma2=f_chroma2_cur,
+                                                                        f_onset1=f_onset1,
+                                                                        f_onset2=f_onset2,
+                                                                        anchors=neighboring_anchors,
+                                                                        alpha=alpha)
+        else:
+            cost_matrices_step2 = compute_cost_matrices_between_anchors(f_chroma1=f_chroma1_cur,
+                                                                        f_chroma2=f_chroma2_cur,
+                                                                        anchors=neighboring_anchors,
+                                                                        alpha=alpha)
+        wp_list_refine = compute_warping_paths_from_cost_matrices(cost_matrices=cost_matrices_step2,
+                                                                  step_sizes=step_sizes,
+                                                                  step_weights=step_weights,
+                                                                  implementation=dtw_implementation)
+        wp = __refine_wp(wp, anchors, wp_list_refine, neighboring_anchors, neighboring_anchor_indices)
+        toc2 = time.perf_counter()
+        computation_time_it = toc2 - tic2 + toc1 - tic1
+        total_computation_time += computation_time_it
+        alignment = wp
+        feature_rate_old = feature_rate_new
+        cost_matrix_size_old = cost_matrix_size_new
+        if verbose and cost_matrices_step2 is not None:
+            sync_visualize_step2(ax,
+                                 cost_matrices_step2,
+                                 wp,
+                                 wp_step1,
+                                 num_rows_step1,
+                                 num_cols_step1,
+                                 anchors_step1,
+                                 neighboring_anchors,
+                                 plot_title=f"{visualization_title} - Level {it + 1}")
+            print('Level {} computation time: {:.2f} seconds'.format(it, computation_time_it))
+        it += 1
+    if verbose:
+        print('Computation time of MrMsDTW: {:.2f} seconds'.format(total_computation_time))
+    return alignment
+def __diagonal_warping_path(f1: np.ndarray,
+                            f2: np.ndarray) -> np.ndarray:
+    """Generates a diagonal warping path given two feature sequences.
+    Parameters
+    ----------
+    f1: np.ndarray [shape=(_, N)]
+        First feature sequence
+    f2: np.ndarray [shape=(_, M)]
+        Second feature sequence
+    Returns
+    -------
+    np.ndarray: Diagonal warping path [shape=(2, T)]
+    """
+    max_size = np.maximum(f1.shape[1], f2.shape[1])
+    min_size = np.minimum(f1.shape[1], f2.shape[1])
+    if min_size == 1:
+        return np.array([max_size - 1, 0]).reshape(-1, 1)
+    elif max_size == f1.shape[1]:
+        return np.array([np.round(np.linspace(0, max_size - 1, min_size)), np.linspace(0, min_size - 1, min_size)])
+    else:
+        return np.array([np.linspace(0, min_size-1, min_size), np.round(np.linspace(0, max_size - 1, min_size))])
+@jit(nopython=True)
+def __compute_area(f1, f2):
+    """Computes the area of the cost matrix given two feature sequences
+    Parameters
+    ----------
+    f1: np.ndarray
+        First feature sequence
+    f2: np.ndarray
+        Second feature sequence
+    Returns
+    -------
+    int: Area of the cost matrix
+    """
+    return f1.shape[1] * f2.shape[1]
+def __split_features(f_chroma1: np.ndarray,
+                     f_onset1: np.ndarray,
+                     f_chroma2: np.ndarray,
+                     f_onset2: np.ndarray,
+                     cur_a1: float,
+                     cur_a2: float,
+                     prev_a1: float,
+                     prev_a2: float,
+                     feature_rate: int) -> Tuple[np.ndarray, Optional[np.ndarray], np.ndarray, Optional[np.ndarray]]:
+    if cur_a1 == -1:
+        f_chroma1_split = f_chroma1[:, int(prev_a1 * feature_rate):]
+        if f_onset1 is not None:
+            f_onset1_split = f_onset1[:, int(prev_a1 * feature_rate):]
+        else:
+            f_onset1_split = None
+    else:
+        # Split the features
+        f_chroma1_split = f_chroma1[:, int(prev_a1 * feature_rate):int(cur_a1 * feature_rate)]
+        if f_onset1 is not None:
+            f_onset1_split = f_onset1[:, int(prev_a1 * feature_rate):int(cur_a1 * feature_rate)]
+        else:
+            f_onset1_split = None
+    if cur_a2 == -1:
+        f_chroma2_split = f_chroma2[:, int(prev_a2 * feature_rate):]
+        if f_onset2 is not None:
+            f_onset2_split = f_onset2[:, int(prev_a2 * feature_rate):]
+        else:
+            f_onset2_split = None
+    else:
+        f_chroma2_split = f_chroma2[:, int(prev_a2 * feature_rate):int(cur_a2 * feature_rate)]
+        if f_onset2 is not None:
+            f_onset2_split = f_onset2[:, int(prev_a2 * feature_rate):int(cur_a2 * feature_rate)]
+        else:
+            f_onset2_split = None
+    return f_chroma1_split, f_onset1_split, f_chroma2_split, f_onset2_split
+def __refine_wp(wp: np.ndarray,
+                anchors: np.ndarray,
+                wp_list_refine: List,
+                neighboring_anchors: np.ndarray,
+                neighboring_anchor_indices: np.ndarray) -> np.ndarray:
+    wp_length = wp[:, neighboring_anchor_indices[-1]:].shape[1]
+    last_list = wp[:, neighboring_anchor_indices[-1]:] - np.tile(
+        wp[:, neighboring_anchor_indices[-1]].reshape(-1, 1), wp_length)
+    wp_list_tmp = [wp[:, :neighboring_anchor_indices[0] + 1]] + wp_list_refine + [last_list]
+    A_tmp = np.concatenate([anchors[:, 0].reshape(-1, 1), neighboring_anchors, anchors[:, -1].reshape(-1, 1)],
+                           axis=1)
+    wp_res = build_path_from_warping_paths(warping_paths=wp_list_tmp,
+                                           anchors=A_tmp)
+    return wp_res
+def __check_anchor_pairs(anchor_pairs: List,
+                         f_len1: int,
+                         f_len2: int,
+                         feature_rate: int):
+    """Ensures that the anchors satisfy the conditions
+    Parameters
+    ----------
+    anchor_pairs: List[Tuple]
+        List of anchor pairs
+    f_len1: int
+        Length of the first feature sequence
+    f_len2: int
+        Length of the second feature sequence
+    feature_rate: int
+        Input feature rate of the features
+    """
+    prev_a1 = 0
+    prev_a2 = 0
+    for anchor_pair in anchor_pairs:
+        a1, a2 = anchor_pair
+        if a1 <= 0 or a2 <= 0:
+            raise ValueError('Starting point must be a positive number!')
+        if a1 > f_len1 / feature_rate or a2 > f_len2 / feature_rate:
+            raise ValueError('Anchor points cannot be greater than the length of the input audio files!')
+        if a1 == f_len1 and a2 == f_len2:
+            raise ValueError('Both anchor points cannot be equal to the length of the audio files.')
+        if a1 == prev_a1 and a2 == prev_a2:
+            raise ValueError('Duplicate anchor pairs are not allowed!')
+        if a1 < prev_a1 or a2 < prev_a2:
+            raise ValueError('Anchor points must be monotonously increasing.')
+        prev_a1 = a1
+        prev_a2 = a2

musc/dtw/utils.py ADDED Viewed

	@@ -0,0 +1,426 @@

+import numpy as np
+from typing import List
+from numba import jit
+import numpy as np
+from scipy import signal
+from typing import Tuple
+from .core import compute_warping_path
+from .cost import *
+def compute_optimal_chroma_shift(f_chroma1: np.ndarray,
+                                 f_chroma2: np.ndarray,
+                                 chroma_transpositions: np.ndarray = np.arange(0, 12),
+                                 step_sizes: np.ndarray = np.array([[1, 0], [0, 1], [1, 1]], int),
+                                 step_weights: np.ndarray = np.array([1.0, 1.0, 1.0], np.float64)) -> int:
+    """Computes the optimal chroma shift which minimizes the DTW cost.
+    Parameters
+    ----------
+    f_chroma1 : np.ndarray [shape=(d_chroma, N_chroma)]
+        First chroma vector
+    f_chroma2 : np.ndarray [shape=(d_chroma, N_chroma)]
+        Second chroma vector
+    step_sizes : np.ndarray
+        DTW step sizes (default: np.array([[1, 0], [0, 1], [1, 1]]))
+    step_weights : np.ndarray
+        DTW step weights (default: np.array([1.0, 1.0, 1.0]))
+    chroma_transpositions : np.ndarray
+        Array of chroma shifts (default: np.arange(0, 11))
+    Returns
+    -------
+    opt_chroma_shift : int
+        Optimal chroma shift which minimizes the DTW cost.
+    """
+    if f_chroma2.shape[1] >= 9000 or f_chroma1.shape[1] >= 9000:
+        print("Warning: You are attempting to find the optimal chroma shift on sequences of length >= 9000. "
+              "This involves full DTW computation. You'll probably want to smooth and downsample your sequences to a"
+              " lower feature resolution before doing this.")
+    opt_chroma_shift = 0
+    dtw_cost = np.inf
+    for chroma_shift in chroma_transpositions:
+        cost_matrix_tmp = cosine_distance(f_chroma1, shift_chroma_vectors(f_chroma2, chroma_shift))
+        D, _, _ = compute_warping_path(cost_matrix_tmp, step_sizes=step_sizes, step_weights=step_weights)
+        if D[-1, -1] < dtw_cost:
+            dtw_cost = D[-1, -1]
+            opt_chroma_shift = chroma_shift
+    return opt_chroma_shift
+def compute_warping_paths_from_cost_matrices(cost_matrices: List,
+                                             step_sizes: np.array = np.array([[1, 0], [0, 1], [1, 1]], int),
+                                             step_weights: np.array = np.array([1.0, 1.0, 1.0], np.float64),
+                                             implementation: str = 'synctoolbox') -> List:
+    """Computes a path via DTW on each matrix in cost_matrices
+    Parameters
+    ----------
+    cost_matrices : list
+        List of cost matrices
+    step_sizes : np.ndarray
+        DTW step sizes (default: np.array([[1, 0], [0, 1], [1, 1]]))
+    step_weights : np.ndarray
+        DTW step weights (default: np.array([1.0, 1.0, 1.0]))
+    implementation : str
+        Choose among 'synctoolbox' and 'librosa' (default: 'synctoolbox')
+    Returns
+    -------
+    wp_list : list
+        List of warping paths
+    """
+    return [compute_warping_path(C=C,
+                                 step_sizes=step_sizes,
+                                 step_weights=step_weights,
+                                 implementation=implementation)[2] for C in cost_matrices]
+def compute_cost_matrices_between_anchors(f_chroma1: np.ndarray,
+                                          f_chroma2: np.ndarray,
+                                          anchors: np.ndarray,
+                                          f_onset1: np.ndarray = None,
+                                          f_onset2: np.ndarray = None,
+                                          alpha: float = 0.5) -> List:
+    """Computes cost matrices for the given features between subsequent
+    pairs of anchors points.
+    Parameters
+    ----------
+    f_chroma1 : np.ndarray [shape=(12, N)]
+        Chroma feature matrix of the first sequence
+    f_chroma2 : np.ndarray [shape=(12, M)]
+        Chroma feature matrix of the second sequence
+    anchors : np.ndarray [shape=(2, R)]
+        Anchor sequence
+    f_onset1 : np.ndarray [shape=(L, N)]
+        Onset feature matrix of the first sequence
+    f_onset2 : np.ndarray [shape=(L, M)]
+        Onset feature matrix of the second sequence
+    alpha: float
+        Alpha parameter to weight the cost functions.
+    Returns
+    -------
+    cost_matrices: list
+        List containing cost matrices
+    """
+    high_res = False
+    if f_onset1 is not None and f_onset2 is not None:
+        high_res = True
+    cost_matrices = list()
+    for k in range(anchors.shape[1] - 1):
+        a1 = np.array(anchors[:, k].astype(int), copy=True)
+        a2 = np.array(anchors[:, k + 1].astype(int), copy=True)
+        if high_res:
+            cost_matrices.append(compute_high_res_cost_matrix(f_chroma1[:, a1[0]: a2[0] + 1],
+                                                              f_chroma2[:, a1[1]: a2[1] + 1],
+                                                              f_onset1[:, a1[0]: a2[0] + 1],
+                                                              f_onset2[:, a1[1]: a2[1] + 1],
+                                                              weights=np.array([alpha, 1-alpha])))
+        else:
+            cost_matrices.append(cosine_distance(f_chroma1[:, a1[0]: a2[0] + 1],
+                                                 f_chroma2[:, a1[1]: a2[1] + 1]))
+    return cost_matrices
+def build_path_from_warping_paths(warping_paths: List,
+                                  anchors: np.ndarray = None) -> np.ndarray:
+    """The function builds a path from a given list of warping paths
+    and the anchors used to obtain these paths. The indices of the original
+    warping paths are adapted such that they cross the anchors.
+    Parameters
+    ----------
+    warping_paths : list
+        List of warping paths
+    anchors : np.ndarray [shape=(2, N)]
+        Anchor sequence
+    Returns
+    -------
+    path : np.ndarray [shape=(2, M)]
+        Merged path
+    """
+    if anchors is None:
+        # When no anchor points are given, we can construct them from the
+        # subpaths in the wp_list
+        # To do this, we assume that the first path's element is the starting
+        # anchor
+        anchors = warping_paths[0][:, 0]
+        # Retrieve the last element of each path
+        anchors_tmp = np.zeros(len(warping_paths), np.float32)
+        for idx, x in enumerate(warping_paths):
+            anchors_tmp[idx] = x[:, -1]
+        # Correct indices, such that the indices of the anchors are given on a
+        # common path. Each anchor a_l = [Nnew_[l+1];Mnew_[l+1]]
+        #    Nnew_[l+1] = N_l + N_[l+1] -1
+        #    Mnew_[l+1] = M_l + M_[l+1] -1
+        anchors_tmp = np.cumsum(anchors_tmp, axis=1)
+        anchors_tmp[:, 1:] = anchors_tmp[:, 1:] - [np.arange(1, anchors_tmp.shape[1]),
+                                                   np.arange(1, anchors_tmp.shape[1])]
+        anchors = np.concatenate([anchors, anchors_tmp], axis=1)
+    L = len(warping_paths) + 1
+    path = None
+    wp = None
+    for anchor_idx in range(1, L):
+        anchor1 = anchors[:, anchor_idx - 1]
+        anchor2 = anchors[:, anchor_idx]
+        wp = np.array(warping_paths[anchor_idx - 1], copy=True)
+        # correct indices in warpingPath
+        wp += np.repeat(anchor1.reshape(-1, 1), wp.shape[1], axis=1).astype(wp.dtype)
+        # consistency checks
+        assert np.array_equal(wp[:, 0], anchor1), 'First entry of warping path does not coincide with anchor point'
+        assert np.array_equal(wp[:, -1], anchor2), 'Last entry of warping path does not coincide with anchor point'
+        if path is None:
+            path = np.array(wp[:, :-1], copy=True)
+        else:
+            path = np.concatenate([path, wp[:, :-1]], axis=1)
+    # append last index of warping path
+    path = np.concatenate([path, wp[:, -1].reshape(-1, 1)], axis=1)
+    return path
+def find_anchor_indices_in_warping_path(warping_path: np.ndarray,
+                                        anchors: np.ndarray) -> np.ndarray:
+    """Compute the indices in the warping path that corresponds
+    to the elements in 'anchors'
+    Parameters
+    ----------
+    warping_path : np.ndarray [shape=(2, N)]
+        Warping path
+    anchors : np.ndarray [shape=(2, M)]
+        Anchor sequence
+    Returns
+    -------
+    indices : np.ndarray [shape=(2, M)]
+        Anchor indices in the ``warping_path``
+    """
+    indices = np.zeros(anchors.shape[1])
+    for k in range(anchors.shape[1]):
+        a = anchors[:, k]
+        indices[k] = np.where((a[0] == warping_path[0, :]) & (a[1] == warping_path[1, :]))[0]
+    return indices
+def make_path_strictly_monotonic(P: np.ndarray) -> np.ndarray:
+    """Compute strict alignment path from a warping path
+    Wrapper around "compute_strict_alignment_path_mask" from libfmp.
+    Parameters
+    ----------
+    P: np.ndarray [shape=(2, N)]
+        Warping path
+    Returns
+    -------
+    P_mod: np.ndarray [shape=(2, M)]
+        Strict alignment path, M <= N
+    """
+    P_mod = compute_strict_alignment_path_mask(P.T)
+    return P_mod.T
+def compute_strict_alignment_path_mask(P):
+    """Compute strict alignment path from a warping path
+    Notebook: C3/C3S3_MusicAppTempoCurve.ipynb
+    Args:
+        P (list or np.ndarray): Wapring path
+    Returns:
+        P_mod (list or np.ndarray): Strict alignment path
+    """
+    P = np.array(P, copy=True)
+    N, M = P[-1]
+    # Get indices for strict monotonicity
+    keep_mask = (P[1:, 0] > P[:-1, 0]) & (P[1:, 1] > P[:-1, 1])
+    # Add first index to enforce start boundary condition
+    keep_mask = np.concatenate(([True], keep_mask))
+    # Remove all indices for of last row or column
+    keep_mask[(P[:, 0] == N) | (P[:, 1] == M)] = False
+    # Add last index to enforce end boundary condition
+    keep_mask[-1] = True
+    P_mod = P[keep_mask, :]
+    return P_mod
+def evaluate_synchronized_positions(ground_truth_positions: np.ndarray,
+                                    synchronized_positions: np.ndarray,
+                                    tolerances: List = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 150, 250]):
+    """Compute standard evaluation measures for evaluating the quality of synchronized (musical) positions.
+    When synchronizing two versions of a piece of music, one can evaluate the quality of the resulting alignment
+    by comparing errors at musical positions (e.g. beats or measures) that appear in both versions.
+    This function implements two measures: mean absolute error at positions and the percentage of correctly transferred
+    measures given a threshold.
+    Parameters
+    ----------
+    ground_truth_positions: np.ndarray [shape=N]
+        Positions (e.g. beat or measure positions) annotated in the target version of a piece of music, in milliseconds.
+    synchronized_positions: np.ndarray [shape=N]
+        The same musical positions as in 'ground_truth_positions' obtained by transfer using music synchronization,
+        in milliseconds.
+    tolerances: list of integers
+        Tolerances (in miliseconds) used for comparing annotated and synchronized positions.
+    Returns
+    -------
+    mean_absolute_error: float
+        Mean absolute error for synchronized positions, in miliseconds.
+    accuracy_at_tolerances: list of floats
+        Percentages of correctly transferred measures, for each entry in 'tolerances'.
+    """
+    absolute_errors_at_positions = np.abs(synchronized_positions - ground_truth_positions)
+    print('Measure transfer from recording 1 to 2 yielded:')
+    mean_absolute_error = np.mean(absolute_errors_at_positions)
+    print('\nMean absolute error (MAE): %.2fms (standard deviation: %.2fms)' % (mean_absolute_error,
+                                                                                np.std(absolute_errors_at_positions)))
+    print('\nAccuracy of transferred positions at different tolerances:')
+    print('\t\t\tAccuracy')
+    print('################################')
+    accuracy_at_tolerances = []
+    for tolerance in tolerances:
+        accuracy = np.mean((absolute_errors_at_positions < tolerance)) * 100.0
+        accuracy_at_tolerances.append(accuracy)
+        print('Tolerance: {} ms \t{:.2f} %'.format(tolerance, accuracy))
+    return mean_absolute_error, accuracy_at_tolerances
+def smooth_downsample_feature(f_feature: np.ndarray,
+                              input_feature_rate: float,
+                              win_len_smooth: int = 0,
+                              downsamp_smooth: int = 1) -> Tuple[np.ndarray, float]:
+    """Temporal smoothing and downsampling of a feature sequence
+    Parameters
+    ----------
+    f_feature : np.ndarray
+        Input feature sequence, size dxN
+    input_feature_rate : float
+        Input feature rate in Hz
+    win_len_smooth : int
+        Smoothing window length. For 0, no smoothing is applied.
+    downsamp_smooth : int
+        Downsampling factor. For 1, no downsampling is applied.
+    Returns
+    -------
+    f_feature_stat : np.ndarray
+        Downsampled & smoothed feature.
+    new_feature_rate : float
+        New feature rate after downsampling
+    """
+    if win_len_smooth != 0 or downsamp_smooth != 1:
+        # hack to get the same results as on MATLAB
+        stat_window = np.hanning(win_len_smooth+2)[1:-1]
+        stat_window /= np.sum(stat_window)
+        # upfirdn filters and downsamples each column of f_stat_help
+        f_feature_stat = signal.upfirdn(h=stat_window, x=f_feature, up=1, down=downsamp_smooth)
+        seg_num = f_feature.shape[1]
+        stat_num = int(np.ceil(seg_num / downsamp_smooth))
+        cut = int(np.floor((win_len_smooth - 1) / (2 * downsamp_smooth)))
+        f_feature_stat = f_feature_stat[:, cut: stat_num + cut]
+    else:
+        f_feature_stat = f_feature
+    new_feature_rate = input_feature_rate / downsamp_smooth
+    return f_feature_stat, new_feature_rate
+@jit(nopython=True)
+def normalize_feature(feature: np.ndarray,
+                      norm_ord: int,
+                      threshold: float) -> np.ndarray:
+    """Normalizes a feature sequence according to the l^norm_ord norm.
+    Parameters
+    ----------
+    feature : np.ndarray
+        Input feature sequence of size d x N
+            d: dimensionality of feature vectors
+            N: number of feature vectors (time in frames)
+    norm_ord : int
+        Norm degree
+    threshold : float
+        If the norm falls below threshold for a feature vector, then the
+        normalized feature vector is set to be the normalized unit vector.
+    Returns
+    -------
+    f_normalized : np.ndarray
+        Normalized feature sequence
+    """
+    # TODO rewrite in vectorized fashion
+    d, N = feature.shape
+    f_normalized = np.zeros((d, N))
+    # normalize the vectors according to the l^norm_ord norm
+    unit_vec = np.ones(d)
+    unit_vec = unit_vec / np.linalg.norm(unit_vec, norm_ord)
+    for k in range(N):
+        cur_norm = np.linalg.norm(feature[:, k], norm_ord)
+        if cur_norm < threshold:
+            f_normalized[:, k] = unit_vec
+        else:
+            f_normalized[:, k] = feature[:, k] / cur_norm
+    return f_normalized

musc/dtw/visualization.py ADDED Viewed

	@@ -0,0 +1,216 @@

+import matplotlib
+import matplotlib.cm
+import matplotlib.patches
+import matplotlib.pyplot as plt
+import numpy as np
+from typing import Tuple, List
+def sync_visualize_step1(cost_matrices: List,
+                         num_rows: int,
+                         num_cols: int,
+                         anchors: np.ndarray,
+                         wp: np.ndarray) -> Tuple[plt.Figure, plt.Axes]:
+    fig, ax = plt.subplots(1, 1, dpi=72)
+    ax = __visualize_cost_matrices(ax, cost_matrices)
+    __visualize_constraint_rectangles(anchors[[1, 0], :],
+                                      edgecolor='firebrick')
+    __visualize_path_in_matrix(ax=ax,
+                               wp=wp,
+                               axisX=np.arange(0, num_rows),
+                               axisY=np.arange(0, num_cols),
+                               path_color='firebrick')
+    return fig, ax
+def sync_visualize_step2(ax: plt.Axes,
+                         cost_matrices: list,
+                         wp_step2: np.ndarray,
+                         wp_step1: np.ndarray,
+                         num_rows_step1: int,
+                         num_cols_step1: int,
+                         anchors_step1: np.ndarray,
+                         neighboring_anchors: np.ndarray,
+                         plot_title: str = ""):
+    offset_x = neighboring_anchors[0, 0] - 1
+    offset_y = neighboring_anchors[1, 0] - 1
+    ax = __visualize_cost_matrices(ax=ax,
+                                   cost_matrices=cost_matrices,
+                                   offset_x=offset_x,
+                                   offset_y=offset_y)
+    __visualize_constraint_rectangles(anchors_step1[[1, 0], :],
+                                      edgecolor='firebrick')
+    __visualize_path_in_matrix(ax=ax,
+                               wp=wp_step1,
+                               axisX=np.arange(0, num_rows_step1),
+                               axisY=np.arange(0, num_cols_step1),
+                               path_color='firebrick')
+    __visualize_constraint_rectangles(neighboring_anchors[[1, 0], :] - 1,
+                                      edgecolor='orangered',
+                                      linestyle='--')
+    __visualize_path_in_matrix(ax=ax,
+                               wp=wp_step2,
+                               axisX=np.arange(0, num_rows_step1),
+                               axisY=np.arange(0, num_cols_step1),
+                               path_color='orangered')
+    ax.set_title(plot_title)
+    ax.set_ylabel("Version 1 (frames)")
+    ax.set_xlabel("Version 2 (frames)")
+    ax = plt.gca()  # get the current axes
+    pcm = None
+    for pcm in ax.get_children():
+        if isinstance(pcm, matplotlib.cm.ScalarMappable):
+            break
+    plt.colorbar(pcm, ax=ax)
+    plt.tight_layout()
+    plt.show()
+def __size_dtw_matrices(dtw_matrices: List) -> Tuple[List[np.ndarray], List[np.ndarray]]:
+    """Gives information about the dimensionality of a DTW matrix
+    given in form of a list matrix
+    Parameters
+    ----------
+    dtw_matrices: list
+        The DTW matrix (cost matrix or accumulated cost matrix) given in form a list.
+    Returns
+    -------
+    axisX_list: list
+        A list containing a horizontal axis for each of the sub matrices
+        which specifies the horizontal position of the respective submatrix
+        in the overall cost matrix.
+    axis_y_list: list
+        A list containing a vertical axis for each of the
+        sub matrices which specifies the vertical position of the
+        respective submatrix in the overall cost matrix.
+    """
+    num_matrices = len(dtw_matrices)
+    size_list = [dtw_mat.shape for dtw_mat in dtw_matrices]
+    axis_x_list = list()
+    axis_y_list = list()
+    x_acc = 0
+    y_acc = 0
+    for i in range(num_matrices):
+        curr_size_list = size_list[i]
+        axis_x_list.append(np.arange(x_acc, x_acc + curr_size_list[0]))
+        axis_y_list.append(np.arange(y_acc, y_acc + curr_size_list[1]))
+        x_acc += curr_size_list[0] - 1
+        y_acc += curr_size_list[1] - 1
+    return axis_x_list, axis_y_list
+def __visualize_cost_matrices(ax: plt.Axes,
+                              cost_matrices: list = None,
+                              offset_x: float = 0.0,
+                              offset_y: float = 0.0) -> plt.Axes:
+    """Visualizes cost matrices
+    Parameters
+    ----------
+    ax : axes
+         The Axes instance to plot on
+    cost_matrices : list
+        List of DTW cost matrices.
+    offset_x : float
+        Offset on the x axis.
+    offset_y : float
+        Offset on the y axis.
+    Returns
+    -------
+    ax: axes
+        The Axes instance to plot on
+    """
+    x_ax, y_ax = __size_dtw_matrices(dtw_matrices=cost_matrices)
+    for i, cur_cost in enumerate(cost_matrices[::-1]):
+        curr_x_ax = x_ax[i] + offset_x
+        curr_y_ax = y_ax[i] + offset_y
+        cur_cost = cost_matrices[i]
+        ax.imshow(cur_cost, cmap='gray_r', aspect='auto', origin='lower',
+                  extent=[curr_y_ax[0], curr_y_ax[-1], curr_x_ax[0], curr_x_ax[-1]])
+    return ax
+def __visualize_path_in_matrix(ax,
+                               wp: np.ndarray = None,
+                               axisX: np.ndarray = None,
+                               axisY: np.ndarray = None,
+                               path_color: str = 'r'):
+    """Plots a warping path on top of a given matrix. The matrix is
+    usually an accumulated cost matrix.
+    Parameters
+    ----------
+    ax : axes
+         The Axes instance to plot on
+    wp : np.ndarray
+        Warping path
+    axisX : np.ndarray
+        Array of X axis
+    axisY : np.ndarray
+        Array of Y axis
+    path_color : str
+        Color of the warping path to be plotted. (default: r)
+    """
+    assert axisX is not None and isinstance(axisX, np.ndarray), 'axisX must be a numpy array!'
+    assert axisY is not None and isinstance(axisY, np.ndarray), 'axisY must be a numpy array!'
+    wp = wp.astype(int)
+    ax.plot(axisY[wp[1, :]], axisX[wp[0, :]], '-k', linewidth=5)
+    ax.plot(axisY[wp[1, :]], axisX[wp[0, :]], color=path_color, linewidth=3)
+def __visualize_constraint_rectangles(anchors: np.ndarray,
+                                      linestyle: str = '-',
+                                      edgecolor: str = 'royalblue',
+                                      linewidth: float = 1.0):
+    for k in range(anchors.shape[1]-1):
+        a1 = anchors[:, k]
+        a2 = anchors[:, k + 1]
+        # a rectangle is defined by [x y width height]
+        x = a1[0]
+        y = a1[1]
+        w = a2[0] - a1[0] + np.finfo(float).eps
+        h = a2[1] - a1[1] + np.finfo(float).eps
+        rect = matplotlib.patches.Rectangle((x, y), w, h,
+                                            linewidth=linewidth,
+                                            edgecolor=edgecolor,
+                                            linestyle=linestyle,
+                                            facecolor='none')
+        plt.gca().add_patch(rect)

musc/model.py ADDED Viewed

	@@ -0,0 +1,275 @@

+from musc.pathway import TinyPathway
+from musc.synchronizer import Synchronizer
+from musc.representations import PerformanceLabel
+from torchaudio.models.conformer import ConformerLayer
+import torch
+from torch import nn
+import numpy as np
+import os
+import json
+import gdown
+class FourHeads(Synchronizer):
+    def __init__(
+            self,
+            pathway_multiscale: int = 32,
+            num_pathway_layers: int = 2,
+            chunk_size: int = 256,
+            hop_length: int = 256,
+            encoder_dim: int = 256,
+            sr: int = 44100,
+            num_heads: int = 4,
+            ffn_dim: int = 128,
+            num_separator_layers: int = 16,
+            num_representation_layers: int = 4,
+            depthwise_conv_kernel_size: int = 31,
+            dropout: float = 0.25,
+            use_group_norm: bool = False,
+            convolution_first: bool = False,
+            labeling=PerformanceLabel(),
+            wiring='tiktok'
+    ):
+        super().__init__(labeling, sr=sr, hop_length=hop_length)
+        self.main = TinyPathway(dilation=1, hop=hop_length, localize=True,
+                                n_layers=num_pathway_layers, chunk_size=chunk_size)
+        self.attendant = TinyPathway(dilation=pathway_multiscale, hop=hop_length, localize=False,
+                                     n_layers=num_pathway_layers, chunk_size=chunk_size)
+        assert self.main.hop == self.attendant.hop  # they should output with the same sample rate
+        print('hop in samples:', self.main.hop)
+        self.input_window = self.attendant.input_window
+        self.encoder_dim = encoder_dim
+        self.dropout = nn.Dropout(dropout)
+        # merge two streams into a conformer input
+        self.stream_merger = nn.Sequential(self.dropout,
+                                           nn.Linear(self.main.out_dim + self.attendant.out_dim, self.encoder_dim))
+        print('main stream window:', self.main.input_window,
+              ', attendant stream window:', self.attendant.input_window,
+              ', conformer input dim:', self.encoder_dim)
+        center = ((chunk_size - 1) * self.main.hop)  # region labeled with pitch track
+        main_overlap = self.main.input_window - center
+        main_overlap = [int(np.floor(main_overlap / 2)), int(np.ceil(main_overlap / 2))]
+        attendant_overlap = self.attendant.input_window - center
+        attendant_overlap = [int(np.floor(attendant_overlap / 2)), int(np.ceil(attendant_overlap / 2))]
+        print('main frame overlap:', main_overlap, ', attendant frame overlap:', attendant_overlap)
+        main_crop_relative = [attendant_overlap[0] - main_overlap[0], main_overlap[1] - attendant_overlap[1]]
+        print('crop for main pathway', main_crop_relative)
+        print("Total sequence duration is", self.attendant.input_window, 'samples')
+        print('Main stream receptive field for one frame is', (self.main.input_window - center), 'samples')
+        print('Attendant stream receptive field for one frame is', (self.attendant.input_window - center), 'samples')
+        self.frame_overlap = attendant_overlap
+        self.main_stream_crop = main_crop_relative
+        self.max_window_size = self.attendant.input_window
+        self.chunk_size = chunk_size
+        self.separator_stream = nn.ModuleList( # source-separation, reinvented
+            [
+                ConformerLayer(
+                    input_dim=self.encoder_dim,
+                    ffn_dim=ffn_dim,
+                    num_attention_heads=num_heads,
+                    depthwise_conv_kernel_size=depthwise_conv_kernel_size,
+                    dropout=dropout,
+                    use_group_norm=use_group_norm,
+                    convolution_first=convolution_first,
+                )
+                for _ in range(num_separator_layers)
+            ]
+        )
+        self.f0_stream = nn.ModuleList(
+            [
+                ConformerLayer(
+                    input_dim=self.encoder_dim,
+                    ffn_dim=ffn_dim,
+                    num_attention_heads=num_heads,
+                    depthwise_conv_kernel_size=depthwise_conv_kernel_size,
+                    dropout=dropout,
+                    use_group_norm=use_group_norm,
+                    convolution_first=convolution_first,
+                )
+                for _ in range(num_representation_layers)
+            ]
+        )
+        self.f0_head = nn.Linear(self.encoder_dim, len(self.labeling.f0_centers_c))
+        self.note_stream = nn.ModuleList(
+            [
+                ConformerLayer(
+                    input_dim=self.encoder_dim,
+                    ffn_dim=ffn_dim,
+                    num_attention_heads=num_heads,
+                    depthwise_conv_kernel_size=depthwise_conv_kernel_size,
+                    dropout=dropout,
+                    use_group_norm=use_group_norm,
+                    convolution_first=convolution_first,
+                )
+                for _ in range(num_representation_layers)
+            ]
+        )
+        self.note_head = nn.Linear(self.encoder_dim, len(self.labeling.midi_centers))
+        self.onset_stream = nn.ModuleList(
+            [
+                ConformerLayer(
+                    input_dim=self.encoder_dim,
+                    ffn_dim=ffn_dim,
+                    num_attention_heads=num_heads,
+                    depthwise_conv_kernel_size=depthwise_conv_kernel_size,
+                    dropout=dropout,
+                    use_group_norm=use_group_norm,
+                    convolution_first=convolution_first,
+                )
+                for _ in range(num_representation_layers)
+            ]
+        )
+        self.onset_head = nn.Linear(self.encoder_dim, len(self.labeling.midi_centers))
+        self.offset_stream = torch.nn.ModuleList(
+            [
+                ConformerLayer(
+                    input_dim=self.encoder_dim,
+                    ffn_dim=ffn_dim,
+                    num_attention_heads=num_heads,
+                    depthwise_conv_kernel_size=depthwise_conv_kernel_size,
+                    dropout=dropout,
+                    use_group_norm=use_group_norm,
+                    convolution_first=convolution_first,
+                )
+                for _ in range(num_representation_layers)
+            ]
+        )
+        self.offset_head = nn.Linear(self.encoder_dim, len(self.labeling.midi_centers))
+        self.labeling = labeling
+        self.double_merger = nn.Sequential(self.dropout, nn.Linear(2 * self.encoder_dim, self.encoder_dim))
+        self.triple_merger = nn.Sequential(self.dropout, nn.Linear(3 * self.encoder_dim, self.encoder_dim))
+        self.wiring = wiring
+        print('Total parameter count: ', self.count_parameters())
+    def count_parameters(self) -> int:
+        """ Count parameters of encoder """
+        return sum([p.numel() for p in self.parameters()])
+    def stream(self, x, representation, key_padding_mask=None):
+        for i, layer in enumerate(self.__getattr__('{}_stream'.format(representation))):
+            x = layer(x, key_padding_mask)
+        return x
+    def head(self, x, representation):
+        return self.__getattr__('{}_head'.format(representation))(x)
+    def forward(self, x, key_padding_mask=None):
+        # two auditory streams followed by the separator stream to ensure timbre-awareness
+        x_attendant = self.attendant(x)
+        x_main = self.main(x[:, self.main_stream_crop[0]:self.main_stream_crop[1]])
+        x = self.stream_merger(torch.cat((x_attendant, x_main), -1).squeeze(1))
+        x = self.stream(x, 'separator', key_padding_mask)
+        f0 = self.stream(x, 'f0', key_padding_mask) # they say this is a low level feature :)
+        if self.wiring == 'parallel':
+            note = self.stream(x, 'note', key_padding_mask)
+            onset = self.stream(x, 'onset', key_padding_mask)
+            offset = self.stream(x, 'offset', key_padding_mask)
+        elif self.wiring == 'tiktok':
+            onset = self.stream(x, 'onset', key_padding_mask)
+            offset = self.stream(x, 'offset', key_padding_mask)
+            # f0 is disconnected, note relies on separator, onset, and offset
+            note = self.stream(self.triple_merger(torch.cat((x, onset, offset), -1)), 'note', key_padding_mask)
+        elif self.wiring == 'tiktok2':
+            onset = self.stream(x, 'onset', key_padding_mask)
+            offset = self.stream(x, 'offset', key_padding_mask)
+            # note is connected to f0, onset, and offset
+            note = self.stream(self.triple_merger(torch.cat((f0, onset, offset), -1)), 'note', key_padding_mask)
+        elif self.wiring == 'spotify':
+            # note is connected to f0 only
+            note = self.stream(f0, 'note', key_padding_mask)
+            # here onset and onsets are higher-level features informed by the separator and note
+            onset = self.stream(self.double_merger(torch.cat((x, note), -1)), 'onset', key_padding_mask)
+            offset = self.stream(self.double_merger(torch.cat((x, note), -1)), 'offset', key_padding_mask)
+        else:
+            # onset and offset are connected to f0 and separator streams
+            onset = self.stream(self.double_merger(torch.cat((x, f0), -1)), 'onset', key_padding_mask)
+            offset = self.stream(self.double_merger(torch.cat((x, f0), -1)), 'offset', key_padding_mask)
+            # note is connected to f0, onset, and offset streams
+            note = self.stream(self.triple_merger(torch.cat((f0, onset, offset), -1)), 'note', key_padding_mask)
+        return {'f0': self.head(f0, 'f0'),
+                'note': self.head(note, 'note'),
+                'onset': self.head(onset, 'onset'),
+                'offset': self.head(offset, 'offset')}
+class PretrainedModel(FourHeads):
+    def __init__(self, instrument='violin'):
+        assert instrument in ['violin', 'Violin', 'vln', 'vl'], 'As of now, the only supported instrument is the violin'
+        instrument = 'violin'
+        package_dir = os.path.dirname(os.path.realpath(__file__))
+        with open(os.path.join(package_dir, instrument + ".json"), "r") as f:
+            args = json.load(f)
+        labeling = PerformanceLabel(note_min=args['note_low'], note_max=args['note_high'],
+                                    f0_bins_per_semitone=args['f0_bins_per_semitone'],
+                                    f0_tolerance_c=200,
+                                    f0_smooth_std_c=args['f0_smooth_std_c'], onset_smooth_std=args['onset_smooth_std'])
+        super().__init__(pathway_multiscale=args['pathway_multiscale'],
+                         num_pathway_layers=args['num_pathway_layers'], wiring=args['wiring'],
+                         hop_length=args['hop_length'], chunk_size=args['chunk_size'],
+                         labeling=labeling, sr=args['sampling_rate'])
+        self.model_url = args['model_file']
+        self.load_weight(instrument)
+        self.eval()
+    def load_weight(self, instrument):
+        self.download_weights(instrument)
+        package_dir = os.path.dirname(os.path.realpath(__file__))
+        filename = "{}_model.pt".format(instrument)
+        self.load_state_dict(torch.load(os.path.join(package_dir, filename)))
+    def download_weights(self, instrument):
+        weight_file = "{}_model.pt".format(instrument)
+        package_dir = os.path.dirname(os.path.realpath(__file__))
+        weight_path = os.path.join(package_dir, weight_file)
+        if not os.path.isfile(weight_path):
+            package_dir = os.path.dirname(os.path.realpath(__file__))
+            weight_path = os.path.join(package_dir, weight_file)
+            if not os.path.exists(weight_path):
+                gdown.download(f"https://drive.google.com/uc?export=download&confirm=pbef&id={self.model_url}", weight_path)
+    @staticmethod
+    def download_youtube(url, audio_codec='wav'):
+        from yt_dlp import YoutubeDL
+        ydl_opts = {'no-playlist': True, 'quiet': True, 'format': 'bestaudio/best',
+                    'outtmpl': '%(id)s.%(ext)s', 'postprocessors': [{
+                'key': 'FFmpegExtractAudio',
+                'preferredcodec': audio_codec,
+                'preferredquality': '192', }], }
+        with YoutubeDL(ydl_opts) as ydl:
+            info_dict = ydl.extract_info(url, download=False)
+            video_id = info_dict.get('id', None)
+            title = info_dict.get('title', None)
+            ydl.download([url])
+        return video_id + '.' + audio_codec, video_id, title
+    def transcribe_youtube(self, url, audio_codec='wav', batch_size=64,
+                           postprocessing='spotify', include_pitch_bends=True):
+        file_path, video_id, title = self.download_youtube(url, audio_codec=audio_codec)
+        midi = self.transcribe(file_path, batch_size=batch_size,
+                               postprocessing=postprocessing, include_pitch_bends=include_pitch_bends)
+        return midi, video_id, title