|
from scipy.misc import imsave |
|
import dlib |
|
import os |
|
import glob |
|
import numpy as np |
|
import cv2 |
|
from multiprocessing import Pool |
|
import pdb |
|
from torch.utils.data import DataLoader, Dataset |
|
import time |
|
|
|
|
|
class MyDataset(Dataset): |
|
def __init__(self): |
|
self.IN = 'GRID/' |
|
self.OUT = 'GRID_imgs/' |
|
self.wav = 'GRID_wavs/' |
|
|
|
with open('GRID_files.txt', 'r') as f: |
|
files = [line.strip() for line in f.readlines()] |
|
self.files = [] |
|
for file in files: |
|
_, ext = os.path.splitext(file) |
|
if ext == '.XML': continue |
|
self.files.append(file) |
|
print(file) |
|
|
|
wav = file.replace(self.IN, self.wav).replace(ext, '.wav') |
|
path = os.path.split(wav)[0] |
|
if not os.path.exists(path): |
|
os.makedirs(path) |
|
|
|
def __len__(self): |
|
return len(self.files) |
|
|
|
def __getitem__(self, idx): |
|
file = self.files[idx] |
|
_, ext = os.path.splitext(file) |
|
dst = file.replace(self.IN, self.OUT).replace(ext, '') |
|
|
|
if not os.path.exists(dst): |
|
os.makedirs(dst) |
|
|
|
cmd = 'ffmpeg -i \'{}\' -qscale:v 2 -r 25 \'{}/%d.jpg\''.format(file, dst) |
|
|
|
os.system(cmd) |
|
|
|
wav = file.replace(self.IN, self.wav).replace(ext, '.wav') |
|
cmd = 'ffmpeg -y -i \'{}\' -async 1 -ac 1 -vn -acodec pcm_s16le -ar 16000 \'{}\' '.format(file, wav) |
|
os.system(cmd) |
|
|
|
return dst |
|
|
|
|
|
if __name__ == '__main__': |
|
dataset = MyDataset() |
|
loader = DataLoader( |
|
dataset, num_workers=32, batch_size=128, |
|
shuffle=False, drop_last=False |
|
) |
|
|
|
tic = time.time() |
|
for (i, batch) in enumerate(loader): |
|
eta = (1.0*time.time()-tic)/(i+1) * (len(loader)-i) |
|
print('eta:{}'.format(eta/3600.0)) |
|
|