from __future__ import unicode_literals import argparse from subprocess import call import cv2 import numpy as np import os import shutil import pandas as pd from tqdm import tqdm import time parser = argparse.ArgumentParser() parser.add_argument('-base_path', '--base_path', help='base folder path of dataset') parser.add_argument('-speaker', '--speaker', help='download videos of a specific speaker {oliver, jon, conan, rock, chemistry, ellen, almaram, angelica, seth, shelly}') args = parser.parse_args() BASE_PATH = args.base_path df = pd.read_csv(os.path.join(BASE_PATH, "videos_links.csv")) if args.speaker: df = df[df['speaker'] == args.speaker] temp_output_path = './tmp2/temp_video.mp4' for _, row in tqdm(df.iterrows(), total=df.shape[0]): i, name, link = row if 'youtube' in link: try: output_path = os.path.join(BASE_PATH, row["speaker"], "videos", row["video_fn"]) if not (os.path.exists(os.path.dirname(output_path))): os.makedirs(os.path.dirname(output_path)) command = 'yt-dlp -o {temp_path} -f mp4 {link}'.format(link=link, temp_path=temp_output_path) res1 = call(command, shell=True) cam = cv2.VideoCapture(temp_output_path) if np.isclose(cam.get(cv2.CAP_PROP_FPS), 29.97, atol=0.03): cam.release() shutil.move(temp_output_path, output_path) else: res2 = call('ffmpeg -i "%s" -r 30000/1001 -strict -2 "%s" -y' % (temp_output_path, output_path), shell=True) except Exception as e: print (e) finally: if os.path.exists(temp_output_path): os.remove(temp_output_path) print("Out of a total of %s videos for %s: "%(len(df), args.speaker)) print("Successfully downloaded:") my_cmd = 'ls ' + os.path.join(BASE_PATH, row["speaker"], "videos") + ' | wc -l' os.system(my_cmd)