File size: 1,964 Bytes
3bbb319
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from __future__ import unicode_literals

import argparse
from subprocess import call

import cv2
import numpy as np
import os
import shutil
import pandas as pd
from tqdm import tqdm
import time

parser = argparse.ArgumentParser()
parser.add_argument('-base_path', '--base_path', help='base folder path of dataset')
parser.add_argument('-speaker', '--speaker',
                    help='download videos of a specific speaker {oliver, jon, conan, rock, chemistry, ellen, almaram, angelica, seth, shelly}')
args = parser.parse_args()

BASE_PATH = args.base_path
df = pd.read_csv(os.path.join(BASE_PATH, "videos_links.csv"))

if args.speaker:
    df = df[df['speaker'] == args.speaker]

temp_output_path = './tmp2/temp_video.mp4'

for _, row in tqdm(df.iterrows(), total=df.shape[0]):

    i, name, link = row
    if 'youtube' in link:
        try:
            output_path = os.path.join(BASE_PATH, row["speaker"], "videos", row["video_fn"])
            if not (os.path.exists(os.path.dirname(output_path))):
                os.makedirs(os.path.dirname(output_path))
            command = 'yt-dlp -o {temp_path} -f mp4 {link}'.format(link=link, temp_path=temp_output_path)
            res1 = call(command, shell=True)
            cam = cv2.VideoCapture(temp_output_path)
            if np.isclose(cam.get(cv2.CAP_PROP_FPS), 29.97, atol=0.03):
                cam.release()
                shutil.move(temp_output_path, output_path)
            else:
                res2 = call('ffmpeg -i "%s" -r 30000/1001 -strict -2 "%s" -y' % (temp_output_path, output_path),
                            shell=True)
        except Exception as e:
            print (e)
        finally:
            if os.path.exists(temp_output_path):
                os.remove(temp_output_path)
print("Out of a total of %s videos for %s: "%(len(df), args.speaker))
print("Successfully downloaded:")
my_cmd = 'ls ' + os.path.join(BASE_PATH, row["speaker"], "videos") + ' | wc -l'
os.system(my_cmd)