File size: 4,106 Bytes
a03c9b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
"""preprocess_mir1k.py"""
import os
import glob
import re
import json
from typing import Dict, List, Tuple
import numpy as np
from utils.audio import get_audio_file_info, load_audio_file
from utils.midi import midi2note, note_event2midi
from utils.note2event import note2note_event, sort_notes, validate_notes, trim_overlapping_notes
from utils.event2note import event2note_event
from utils.note_event_dataclasses import Note, NoteEvent
from utils.utils import note_event2token2note_event_sanity_check

# def create_spleeter_audio_stem(vocal_audio_file, accomp_audio_file, mir_st500_id) -> Dict:
#     program = MIR_ST500_PROGRAM
#     is_drum = [0, 0]

#     audio_tracks = []  # multi-channel audio array (C, T)
#     vocal_audio = load_audio_file(vocal_audio_file, dtype=np.int16) / 2**15  # returns bytes
#     audio_tracks.append(vocal_audio.astype(np.float16))
#     accomp_audio = load_audio_file(accomp_audio_file, dtype=np.int16) / 2**15  # returns bytes
#     audio_tracks.append(accomp_audio.astype(np.float16))
#     max_length = max(len(vocal_audio), len(accomp_audio))

#     # collate all the audio tracks into a single array
#     n_tracks = 2
#     audio_array = np.zeros((n_tracks, max_length), dtype=np.float16)
#     for j, audio in enumerate(audio_tracks):
#         audio_array[j, :len(audio)] = audio

#     stem_content = {
#         'mir_st500_id': mir_st500_id,
#         'program': np.array(program, dtype=np.int64),
#         'is_drum': np.array(is_drum, dtype=np.int64),
#         'n_frames': max_length,  # int
#         'audio_array': audio_array  # (n_tracks, n_frames)
#     }
#     return stem_content

# def create_note_note_event_midi_from_mir1k_annotation(ann, midi_file, mir_st500_id):
#     """
#     Args:
#         ann: List[List[float, float, float]] # [onset, offset, pitch]
#         mir_st500_id: str
#     Returns:
#         notes: List[Note]
#         note_events: List[NoteEvent]
#         midi: List[List[int]]
#     """
#     notes = []
#     for onset, offset, pitch in ann:
#         notes.append(
#             Note(
#                 is_drum=False,
#                 program=100,
#                 onset=float(onset),
#                 offset=float(offset),
#                 pitch=int(pitch),
#                 velocity=1))
#     notes = sort_notes(notes)
#     notes = validate_notes(notes)
#     notes = trim_overlapping_notes(notes)
#     note_events = note2note_event(notes)

#     # Write midi file
#     note_event2midi(note_events, midi_file)
#     print(f"Created {midi_file}")

#     return {  # notes
#         'mir_st500_id': mir_st500_id,
#         'program': MIR_ST500_PROGRAM,
#         'is_drum': [0, 0],
#         'duration_sec': note_events[-1].time,
#         'notes': notes,
#     }, {  # note_events
#         'mir_st500_id': mir_st500_id,
#         'program': MIR_ST500_PROGRAM,
#         'is_drum': [0, 0],
#         'duration_sec': note_events[-1].time,
#         'note_events': note_events,
#     }


def preprocess_mir1k_16k(data_home=os.PathLike, dataset_name='mir1k', sanity_check=False) -> None:
    """
    Splits:
        - train: index 1 to 400, 346 files (54 files missing)
        - test: index 401 to 500, 94 files (6 files missing)
        - all: 440 files (60 files missing)

    Writes:
        - {dataset_name}_{split}_file_list.json: a dictionary with the following keys:
        {
            index:
            {
                'mir_st500_id': mir_st500_id,
                'n_frames': (int),
                'mix_audio_file': 'path/to/mix.wav',
                'notes_file': 'path/to/notes.npy',
                'note_events_file': 'path/to/note_events.npy',
                'midi_file': 'path/to/midi.mid',
                'program': List[int], # [100, 129], 100 for singing voice, and 129 for unannotated  
                'is_drum': List[int], # [0] or [1]
            }
        }
    """

    # Directory and file paths
    base_dir = os.path.join(data_home, dataset_name + '_yourmt3_16k')
    output_index_dir = os.path.join(data_home, 'yourmt3_indexes')
    os.makedirs(output_index_dir, exist_ok=True)