Shadhil commited on
Commit
1a79cb6
·
verified ·
1 Parent(s): cd4aeba

Upload 425 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +28 -0
  3. app.py +58 -0
  4. inference.py +345 -0
  5. predict.py +552 -0
  6. requirements.txt +15 -0
  7. temp/1.mp4_coeffs.npy +3 -0
  8. temp/1.mp4_landmarks.txt +0 -0
  9. temp/1.mp4_stablized.npy +3 -0
  10. temp/1.mp4x12_landmarks.txt +0 -0
  11. temp/dropbox5.mp4_coeffs.npy +3 -0
  12. temp/dropbox5.mp4_landmarks.txt +0 -0
  13. temp/dropbox5.mp4_stablized.npy +3 -0
  14. temp/dropbox5.mp4x12_landmarks.txt +0 -0
  15. temp/face.mp4_coeffs.npy +3 -0
  16. temp/face.mp4_landmarks.txt +0 -0
  17. temp/face.mp4_stablized.npy +3 -0
  18. temp/face.mp4x12_landmarks.txt +0 -0
  19. temp/temp/result.mp4 +0 -0
  20. temp/temp/temp.wav +3 -0
  21. third_part/GFPGAN/LICENSE +351 -0
  22. third_part/GFPGAN/gfpgan/__init__.py +8 -0
  23. third_part/GFPGAN/gfpgan/__pycache__/__init__.cpython-37.pyc +0 -0
  24. third_part/GFPGAN/gfpgan/__pycache__/__init__.cpython-38.pyc +0 -0
  25. third_part/GFPGAN/gfpgan/__pycache__/__init__.cpython-39.pyc +0 -0
  26. third_part/GFPGAN/gfpgan/__pycache__/utils.cpython-37.pyc +0 -0
  27. third_part/GFPGAN/gfpgan/__pycache__/utils.cpython-38.pyc +0 -0
  28. third_part/GFPGAN/gfpgan/__pycache__/utils.cpython-39.pyc +0 -0
  29. third_part/GFPGAN/gfpgan/archs/__init__.py +10 -0
  30. third_part/GFPGAN/gfpgan/archs/__pycache__/__init__.cpython-37.pyc +0 -0
  31. third_part/GFPGAN/gfpgan/archs/__pycache__/__init__.cpython-38.pyc +0 -0
  32. third_part/GFPGAN/gfpgan/archs/__pycache__/__init__.cpython-39.pyc +0 -0
  33. third_part/GFPGAN/gfpgan/archs/__pycache__/arcface_arch.cpython-37.pyc +0 -0
  34. third_part/GFPGAN/gfpgan/archs/__pycache__/arcface_arch.cpython-38.pyc +0 -0
  35. third_part/GFPGAN/gfpgan/archs/__pycache__/arcface_arch.cpython-39.pyc +0 -0
  36. third_part/GFPGAN/gfpgan/archs/__pycache__/gfpgan_bilinear_arch.cpython-37.pyc +0 -0
  37. third_part/GFPGAN/gfpgan/archs/__pycache__/gfpgan_bilinear_arch.cpython-38.pyc +0 -0
  38. third_part/GFPGAN/gfpgan/archs/__pycache__/gfpgan_bilinear_arch.cpython-39.pyc +0 -0
  39. third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_arch.cpython-37.pyc +0 -0
  40. third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_arch.cpython-38.pyc +0 -0
  41. third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_arch.cpython-39.pyc +0 -0
  42. third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_clean_arch.cpython-37.pyc +0 -0
  43. third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_clean_arch.cpython-38.pyc +0 -0
  44. third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_clean_arch.cpython-39.pyc +0 -0
  45. third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_bilinear_arch.cpython-37.pyc +0 -0
  46. third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_bilinear_arch.cpython-38.pyc +0 -0
  47. third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_bilinear_arch.cpython-39.pyc +0 -0
  48. third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_clean_arch.cpython-37.pyc +0 -0
  49. third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_clean_arch.cpython-38.pyc +0 -0
  50. third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_clean_arch.cpython-39.pyc +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ temp/temp/temp.wav filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.9 base image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory
5
+ WORKDIR /code
6
+
7
+ # Copy requirements file and install dependencies
8
+ COPY ./requirements.txt /code/requirements.txt
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ # Copy the rest of the project files
12
+ COPY ./models /code/models
13
+ COPY ./checkpoints /code/checkpoints
14
+ COPY ./results /code/results
15
+ COPY ./temp /code/temp
16
+ COPY ./third_part /code/third_part
17
+ COPY ./utils /code/utils
18
+
19
+ # Copy additional Python files
20
+ COPY ./inference.py /code/inference.py
21
+ COPY ./predict.py /code/predict.py
22
+ COPY ./app.py /code/app.py
23
+
24
+ # Expose port for FastAPI server
25
+ EXPOSE 7860
26
+
27
+ # Define the command to run the FastAPI application with uvicorn
28
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import subprocess
4
+ import tempfile
5
+ import urllib.request
6
+ from typing import Tuple, Optional
7
+
8
+ import asyncio
9
+
10
+ from fastapi import FastAPI, File, UploadFile, HTTPException
11
+ from fastapi.responses import FileResponse
12
+ from pydantic import BaseModel, HttpUrl
13
+
14
+ app = FastAPI()
15
+
16
+ class VideoAudioRequest(BaseModel):
17
+ face_url: HttpUrl
18
+ audio_url: HttpUrl
19
+
20
+ def download_file(url: str, destination: str) -> None:
21
+ try:
22
+ with urllib.request.urlopen(str(url)) as response, open(destination, 'wb') as out_file:
23
+ shutil.copyfileobj(response, out_file)
24
+ except Exception as e:
25
+ raise HTTPException(status_code=500, detail=f"Failed to download file from {url}: {e}")
26
+
27
+ async def process_video_and_audio(face_url: str, audio_url: str) -> Tuple[str, Optional[str]]:
28
+ print("process started")
29
+ temp_dir = tempfile.mkdtemp(prefix="fastapi_processing_")
30
+ face_path = os.path.join(temp_dir, "face.mp4")
31
+ audio_path = os.path.join(temp_dir, "audio.mp3")
32
+
33
+ try:
34
+ download_file(face_url, face_path)
35
+ download_file(audio_url, audio_path)
36
+ print(face_path,audio_path)
37
+ except HTTPException as e:
38
+ return "", str(e)
39
+
40
+ outfile_path = os.path.join(temp_dir, "result.mp4")
41
+ command = f"python3 inference.py --face {face_path} --audio {audio_path} --outfile {outfile_path}"
42
+ try:
43
+ process = await asyncio.create_subprocess_shell(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
44
+ _, error = await process.communicate()
45
+ if process.returncode == 0:
46
+ return outfile_path, None
47
+ else:
48
+ return "", f"Error occurred during processing: {error.decode()}"
49
+ except Exception as e:
50
+ return "", f"Error occurred during processing: {e}"
51
+
52
+ @app.post("/process_video_audio")
53
+ async def process_video_audio(request_data: VideoAudioRequest):
54
+ result, error = await process_video_and_audio(request_data.face_url, request_data.audio_url)
55
+ if result:
56
+ return FileResponse(result, media_type="video/mp4")
57
+ else:
58
+ raise HTTPException(status_code=500, detail=error)
inference.py ADDED
@@ -0,0 +1,345 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2, os, sys, subprocess, platform, torch
3
+ from tqdm import tqdm
4
+ from PIL import Image
5
+ from scipy.io import loadmat
6
+
7
+ sys.path.insert(0, 'third_part')
8
+ sys.path.insert(0, 'third_part/GPEN')
9
+ sys.path.insert(0, 'third_part/GFPGAN')
10
+
11
+ # 3dmm extraction
12
+ from third_part.face3d.util.preprocess import align_img
13
+ from third_part.face3d.util.load_mats import load_lm3d
14
+ from third_part.face3d.extract_kp_videos import KeypointExtractor
15
+ # face enhancement
16
+ from third_part.GPEN.gpen_face_enhancer import FaceEnhancement
17
+ from third_part.GFPGAN.gfpgan import GFPGANer
18
+ # expression control
19
+ from third_part.ganimation_replicate.model.ganimation import GANimationModel
20
+
21
+ from utils import audio
22
+ from utils.ffhq_preprocess import Croper
23
+ from utils.alignment_stit import crop_faces, calc_alignment_coefficients, paste_image
24
+ from utils.inference_utils import Laplacian_Pyramid_Blending_with_mask, face_detect, load_model, options, split_coeff, \
25
+ trans_image, transform_semantic, find_crop_norm_ratio, load_face3d_net, exp_aus_dict
26
+ import warnings
27
+ warnings.filterwarnings("ignore")
28
+
29
+ args = options()
30
+
31
+ def main():
32
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
33
+ print('[Info] Using {} for inference.'.format(device))
34
+ os.makedirs(os.path.join('temp', args.tmp_dir), exist_ok=True)
35
+
36
+ enhancer = FaceEnhancement(base_dir='checkpoints', size=512, model='GPEN-BFR-512', use_sr=False, \
37
+ sr_model='rrdb_realesrnet_psnr', channel_multiplier=2, narrow=1, device=device)
38
+ restorer = GFPGANer(model_path='checkpoints/GFPGANv1.3.pth', upscale=1, arch='clean', \
39
+ channel_multiplier=2, bg_upsampler=None)
40
+
41
+ base_name = args.face.split('/')[-1]
42
+ if os.path.isfile(args.face) and args.face.split('.')[1] in ['jpg', 'png', 'jpeg']:
43
+ args.static = True
44
+ if not os.path.isfile(args.face):
45
+ raise ValueError('--face argument must be a valid path to video/image file')
46
+ elif args.face.split('.')[1] in ['jpg', 'png', 'jpeg']:
47
+ full_frames = [cv2.imread(args.face)]
48
+ fps = args.fps
49
+ else:
50
+ video_stream = cv2.VideoCapture(args.face)
51
+ fps = video_stream.get(cv2.CAP_PROP_FPS)
52
+
53
+ full_frames = []
54
+ while True:
55
+ still_reading, frame = video_stream.read()
56
+ if not still_reading:
57
+ video_stream.release()
58
+ break
59
+ y1, y2, x1, x2 = args.crop
60
+ if x2 == -1: x2 = frame.shape[1]
61
+ if y2 == -1: y2 = frame.shape[0]
62
+ frame = frame[y1:y2, x1:x2]
63
+ full_frames.append(frame)
64
+
65
+ print ("[Step 0] Number of frames available for inference: "+str(len(full_frames)))
66
+ # face detection & cropping, cropping the first frame as the style of FFHQ
67
+ croper = Croper('checkpoints/shape_predictor_68_face_landmarks.dat')
68
+ full_frames_RGB = [cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in full_frames]
69
+ full_frames_RGB, crop, quad = croper.crop(full_frames_RGB, xsize=512)
70
+
71
+ clx, cly, crx, cry = crop
72
+ lx, ly, rx, ry = quad
73
+ lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry)
74
+ oy1, oy2, ox1, ox2 = cly+ly, min(cly+ry, full_frames[0].shape[0]), clx+lx, min(clx+rx, full_frames[0].shape[1])
75
+ # original_size = (ox2 - ox1, oy2 - oy1)
76
+ frames_pil = [Image.fromarray(cv2.resize(frame,(256,256))) for frame in full_frames_RGB]
77
+
78
+ # get the landmark according to the detected face.
79
+ if not os.path.isfile('temp/'+base_name+'_landmarks.txt') or args.re_preprocess:
80
+ print('[Step 1] Landmarks Extraction in Video.')
81
+ kp_extractor = KeypointExtractor()
82
+ lm = kp_extractor.extract_keypoint(frames_pil, './temp/'+base_name+'_landmarks.txt')
83
+ else:
84
+ print('[Step 1] Using saved landmarks.')
85
+ lm = np.loadtxt('temp/'+base_name+'_landmarks.txt').astype(np.float32)
86
+ lm = lm.reshape([len(full_frames), -1, 2])
87
+
88
+ if not os.path.isfile('temp/'+base_name+'_coeffs.npy') or args.exp_img is not None or args.re_preprocess:
89
+ net_recon = load_face3d_net(args.face3d_net_path, device)
90
+ lm3d_std = load_lm3d('checkpoints/BFM')
91
+
92
+ video_coeffs = []
93
+ for idx in tqdm(range(len(frames_pil)), desc="[Step 2] 3DMM Extraction In Video:"):
94
+ frame = frames_pil[idx]
95
+ W, H = frame.size
96
+ lm_idx = lm[idx].reshape([-1, 2])
97
+ if np.mean(lm_idx) == -1:
98
+ lm_idx = (lm3d_std[:, :2]+1) / 2.
99
+ lm_idx = np.concatenate([lm_idx[:, :1] * W, lm_idx[:, 1:2] * H], 1)
100
+ else:
101
+ lm_idx[:, -1] = H - 1 - lm_idx[:, -1]
102
+
103
+ trans_params, im_idx, lm_idx, _ = align_img(frame, lm_idx, lm3d_std)
104
+ trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]).astype(np.float32)
105
+ im_idx_tensor = torch.tensor(np.array(im_idx)/255., dtype=torch.float32).permute(2, 0, 1).to(device).unsqueeze(0)
106
+ with torch.no_grad():
107
+ coeffs = split_coeff(net_recon(im_idx_tensor))
108
+
109
+ pred_coeff = {key:coeffs[key].cpu().numpy() for key in coeffs}
110
+ pred_coeff = np.concatenate([pred_coeff['id'], pred_coeff['exp'], pred_coeff['tex'], pred_coeff['angle'],\
111
+ pred_coeff['gamma'], pred_coeff['trans'], trans_params[None]], 1)
112
+ video_coeffs.append(pred_coeff)
113
+ semantic_npy = np.array(video_coeffs)[:,0]
114
+ np.save('temp/'+base_name+'_coeffs.npy', semantic_npy)
115
+ else:
116
+ print('[Step 2] Using saved coeffs.')
117
+ semantic_npy = np.load('temp/'+base_name+'_coeffs.npy').astype(np.float32)
118
+
119
+ # generate the 3dmm coeff from a single image
120
+ if args.exp_img is not None and ('.png' in args.exp_img or '.jpg' in args.exp_img):
121
+ print('extract the exp from',args.exp_img)
122
+ exp_pil = Image.open(args.exp_img).convert('RGB')
123
+ lm3d_std = load_lm3d('third_part/face3d/BFM')
124
+
125
+ W, H = exp_pil.size
126
+ kp_extractor = KeypointExtractor()
127
+ lm_exp = kp_extractor.extract_keypoint([exp_pil], 'temp/'+base_name+'_temp.txt')[0]
128
+ if np.mean(lm_exp) == -1:
129
+ lm_exp = (lm3d_std[:, :2] + 1) / 2.
130
+ lm_exp = np.concatenate(
131
+ [lm_exp[:, :1] * W, lm_exp[:, 1:2] * H], 1)
132
+ else:
133
+ lm_exp[:, -1] = H - 1 - lm_exp[:, -1]
134
+
135
+ trans_params, im_exp, lm_exp, _ = align_img(exp_pil, lm_exp, lm3d_std)
136
+ trans_params = np.array([float(item) for item in np.hsplit(trans_params, 5)]).astype(np.float32)
137
+ im_exp_tensor = torch.tensor(np.array(im_exp)/255., dtype=torch.float32).permute(2, 0, 1).to(device).unsqueeze(0)
138
+ with torch.no_grad():
139
+ expression = split_coeff(net_recon(im_exp_tensor))['exp'][0]
140
+ del net_recon
141
+ elif args.exp_img == 'smile':
142
+ expression = torch.tensor(loadmat('checkpoints/expression.mat')['expression_mouth'])[0]
143
+ else:
144
+ print('using expression center')
145
+ expression = torch.tensor(loadmat('checkpoints/expression.mat')['expression_center'])[0]
146
+
147
+ # load DNet, model(LNet and ENet)
148
+ D_Net, model = load_model(args, device)
149
+
150
+ if not os.path.isfile('temp/'+base_name+'_stablized.npy') or args.re_preprocess:
151
+ imgs = []
152
+ for idx in tqdm(range(len(frames_pil)), desc="[Step 3] Stabilize the expression In Video:"):
153
+ if args.one_shot:
154
+ source_img = trans_image(frames_pil[0]).unsqueeze(0).to(device)
155
+ semantic_source_numpy = semantic_npy[0:1]
156
+ else:
157
+ source_img = trans_image(frames_pil[idx]).unsqueeze(0).to(device)
158
+ semantic_source_numpy = semantic_npy[idx:idx+1]
159
+ ratio = find_crop_norm_ratio(semantic_source_numpy, semantic_npy)
160
+ coeff = transform_semantic(semantic_npy, idx, ratio).unsqueeze(0).to(device)
161
+
162
+ # hacking the new expression
163
+ coeff[:, :64, :] = expression[None, :64, None].to(device)
164
+ with torch.no_grad():
165
+ output = D_Net(source_img, coeff)
166
+ img_stablized = np.uint8((output['fake_image'].squeeze(0).permute(1,2,0).cpu().clamp_(-1, 1).numpy() + 1 )/2. * 255)
167
+ imgs.append(cv2.cvtColor(img_stablized,cv2.COLOR_RGB2BGR))
168
+ np.save('temp/'+base_name+'_stablized.npy',imgs)
169
+ del D_Net
170
+ else:
171
+ print('[Step 3] Using saved stabilized video.')
172
+ imgs = np.load('temp/'+base_name+'_stablized.npy')
173
+ torch.cuda.empty_cache()
174
+
175
+ if not args.audio.endswith('.wav'):
176
+ command = 'ffmpeg -loglevel error -y -i {} -strict -2 {}'.format(args.audio, 'temp/{}/temp.wav'.format(args.tmp_dir))
177
+ subprocess.call(command, shell=True)
178
+ args.audio = 'temp/{}/temp.wav'.format(args.tmp_dir)
179
+ wav = audio.load_wav(args.audio, 16000)
180
+ mel = audio.melspectrogram(wav)
181
+ if np.isnan(mel.reshape(-1)).sum() > 0:
182
+ raise ValueError('Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again')
183
+
184
+ mel_step_size, mel_idx_multiplier, i, mel_chunks = 16, 80./fps, 0, []
185
+ while True:
186
+ start_idx = int(i * mel_idx_multiplier)
187
+ if start_idx + mel_step_size > len(mel[0]):
188
+ mel_chunks.append(mel[:, len(mel[0]) - mel_step_size:])
189
+ break
190
+ mel_chunks.append(mel[:, start_idx : start_idx + mel_step_size])
191
+ i += 1
192
+
193
+ print("[Step 4] Load audio; Length of mel chunks: {}".format(len(mel_chunks)))
194
+ imgs = imgs[:len(mel_chunks)]
195
+ full_frames = full_frames[:len(mel_chunks)]
196
+ lm = lm[:len(mel_chunks)]
197
+
198
+ imgs_enhanced = []
199
+ for idx in tqdm(range(len(imgs)), desc='[Step 5] Reference Enhancement'):
200
+ img = imgs[idx]
201
+ pred, _, _ = enhancer.process(img, img, face_enhance=True, possion_blending=False)
202
+ imgs_enhanced.append(pred)
203
+ gen = datagen(imgs_enhanced.copy(), mel_chunks, full_frames, None, (oy1,oy2,ox1,ox2))
204
+
205
+ frame_h, frame_w = full_frames[0].shape[:-1]
206
+ out = cv2.VideoWriter('temp/{}/result.mp4'.format(args.tmp_dir), cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_w, frame_h))
207
+
208
+ if args.up_face != 'original':
209
+ instance = GANimationModel()
210
+ instance.initialize()
211
+ instance.setup()
212
+
213
+ kp_extractor = KeypointExtractor()
214
+ for i, (img_batch, mel_batch, frames, coords, img_original, f_frames) in enumerate(tqdm(gen, desc='[Step 6] Lip Synthesis:', total=int(np.ceil(float(len(mel_chunks)) / args.LNet_batch_size)))):
215
+ img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(device)
216
+ mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(device)
217
+ img_original = torch.FloatTensor(np.transpose(img_original, (0, 3, 1, 2))).to(device)/255. # BGR -> RGB
218
+
219
+ with torch.no_grad():
220
+ incomplete, reference = torch.split(img_batch, 3, dim=1)
221
+ pred, low_res = model(mel_batch, img_batch, reference)
222
+ pred = torch.clamp(pred, 0, 1)
223
+
224
+ if args.up_face in ['sad', 'angry', 'surprise']:
225
+ tar_aus = exp_aus_dict[args.up_face]
226
+ else:
227
+ pass
228
+
229
+ if args.up_face == 'original':
230
+ cur_gen_faces = img_original
231
+ else:
232
+ test_batch = {'src_img': torch.nn.functional.interpolate((img_original * 2 - 1), size=(128, 128), mode='bilinear'),
233
+ 'tar_aus': tar_aus.repeat(len(incomplete), 1)}
234
+ instance.feed_batch(test_batch)
235
+ instance.forward()
236
+ cur_gen_faces = torch.nn.functional.interpolate(instance.fake_img / 2. + 0.5, size=(384, 384), mode='bilinear')
237
+
238
+ if args.without_rl1 is not False:
239
+ incomplete, reference = torch.split(img_batch, 3, dim=1)
240
+ mask = torch.where(incomplete==0, torch.ones_like(incomplete), torch.zeros_like(incomplete))
241
+ pred = pred * mask + cur_gen_faces * (1 - mask)
242
+
243
+ pred = pred.cpu().numpy().transpose(0, 2, 3, 1) * 255.
244
+
245
+ torch.cuda.empty_cache()
246
+ for p, f, xf, c in zip(pred, frames, f_frames, coords):
247
+ y1, y2, x1, x2 = c
248
+ p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
249
+
250
+ ff = xf.copy()
251
+ ff[y1:y2, x1:x2] = p
252
+
253
+ # month region enhancement by GFPGAN
254
+ cropped_faces, restored_faces, restored_img = restorer.enhance(
255
+ ff, has_aligned=False, only_center_face=True, paste_back=True)
256
+ # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
257
+ mm = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 0, 0, 0, 0, 0]
258
+ mouse_mask = np.zeros_like(restored_img)
259
+ tmp_mask = enhancer.faceparser.process(restored_img[y1:y2, x1:x2], mm)[0]
260
+ mouse_mask[y1:y2, x1:x2]= cv2.resize(tmp_mask, (x2 - x1, y2 - y1))[:, :, np.newaxis] / 255.
261
+
262
+ height, width = ff.shape[:2]
263
+ restored_img, ff, full_mask = [cv2.resize(x, (512, 512)) for x in (restored_img, ff, np.float32(mouse_mask))]
264
+ img = Laplacian_Pyramid_Blending_with_mask(restored_img, ff, full_mask[:, :, 0], 10)
265
+ pp = np.uint8(cv2.resize(np.clip(img, 0 ,255), (width, height)))
266
+
267
+ pp, orig_faces, enhanced_faces = enhancer.process(pp, xf, bbox=c, face_enhance=False, possion_blending=True)
268
+ out.write(pp)
269
+ out.release()
270
+
271
+ if not os.path.isdir(os.path.dirname(args.outfile)):
272
+ os.makedirs(os.path.dirname(args.outfile), exist_ok=True)
273
+ command = 'ffmpeg -loglevel error -y -i {} -i {} -strict -2 -q:v 1 {}'.format(args.audio, 'temp/{}/result.mp4'.format(args.tmp_dir), args.outfile)
274
+ subprocess.call(command, shell=platform.system() != 'Windows')
275
+ print('outfile:', args.outfile)
276
+
277
+
278
+ # frames:256x256, full_frames: original size
279
+ def datagen(frames, mels, full_frames, frames_pil, cox):
280
+ img_batch, mel_batch, frame_batch, coords_batch, ref_batch, full_frame_batch = [], [], [], [], [], []
281
+ base_name = args.face.split('/')[-1]
282
+ refs = []
283
+ image_size = 256
284
+
285
+ # original frames
286
+ kp_extractor = KeypointExtractor()
287
+ fr_pil = [Image.fromarray(frame) for frame in frames]
288
+ lms = kp_extractor.extract_keypoint(fr_pil, 'temp/'+base_name+'x12_landmarks.txt')
289
+ frames_pil = [ (lm, frame) for frame,lm in zip(fr_pil, lms)] # frames is the croped version of modified face
290
+ crops, orig_images, quads = crop_faces(image_size, frames_pil, scale=1.0, use_fa=True)
291
+ inverse_transforms = [calc_alignment_coefficients(quad + 0.5, [[0, 0], [0, image_size], [image_size, image_size], [image_size, 0]]) for quad in quads]
292
+ del kp_extractor.detector
293
+
294
+ oy1,oy2,ox1,ox2 = cox
295
+ face_det_results = face_detect(full_frames, args, jaw_correction=True)
296
+
297
+ for inverse_transform, crop, full_frame, face_det in zip(inverse_transforms, crops, full_frames, face_det_results):
298
+ imc_pil = paste_image(inverse_transform, crop, Image.fromarray(
299
+ cv2.resize(full_frame[int(oy1):int(oy2), int(ox1):int(ox2)], (256, 256))))
300
+
301
+ ff = full_frame.copy()
302
+ ff[int(oy1):int(oy2), int(ox1):int(ox2)] = cv2.resize(np.array(imc_pil.convert('RGB')), (ox2 - ox1, oy2 - oy1))
303
+ oface, coords = face_det
304
+ y1, y2, x1, x2 = coords
305
+ refs.append(ff[y1: y2, x1:x2])
306
+
307
+ for i, m in enumerate(mels):
308
+ idx = 0 if args.static else i % len(frames)
309
+ frame_to_save = frames[idx].copy()
310
+ face = refs[idx]
311
+ oface, coords = face_det_results[idx].copy()
312
+
313
+ face = cv2.resize(face, (args.img_size, args.img_size))
314
+ oface = cv2.resize(oface, (args.img_size, args.img_size))
315
+
316
+ img_batch.append(oface)
317
+ ref_batch.append(face)
318
+ mel_batch.append(m)
319
+ coords_batch.append(coords)
320
+ frame_batch.append(frame_to_save)
321
+ full_frame_batch.append(full_frames[idx].copy())
322
+
323
+ if len(img_batch) >= args.LNet_batch_size:
324
+ img_batch, mel_batch, ref_batch = np.asarray(img_batch), np.asarray(mel_batch), np.asarray(ref_batch)
325
+ img_masked = img_batch.copy()
326
+ img_original = img_batch.copy()
327
+ img_masked[:, args.img_size//2:] = 0
328
+ img_batch = np.concatenate((img_masked, ref_batch), axis=3) / 255.
329
+ mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
330
+
331
+ yield img_batch, mel_batch, frame_batch, coords_batch, img_original, full_frame_batch
332
+ img_batch, mel_batch, frame_batch, coords_batch, img_original, full_frame_batch, ref_batch = [], [], [], [], [], [], []
333
+
334
+ if len(img_batch) > 0:
335
+ img_batch, mel_batch, ref_batch = np.asarray(img_batch), np.asarray(mel_batch), np.asarray(ref_batch)
336
+ img_masked = img_batch.copy()
337
+ img_original = img_batch.copy()
338
+ img_masked[:, args.img_size//2:] = 0
339
+ img_batch = np.concatenate((img_masked, ref_batch), axis=3) / 255.
340
+ mel_batch = np.reshape(mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1])
341
+ yield img_batch, mel_batch, frame_batch, coords_batch, img_original, full_frame_batch
342
+
343
+
344
+ if __name__ == '__main__':
345
+ main()
predict.py ADDED
@@ -0,0 +1,552 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Prediction interface for Cog ⚙️
2
+ # https://github.com/replicate/cog/blob/main/docs/python.md
3
+
4
+ import os
5
+ import sys
6
+ import argparse
7
+ import subprocess
8
+ import numpy as np
9
+ from tqdm import tqdm
10
+ from PIL import Image
11
+ from scipy.io import loadmat
12
+ import torch
13
+ import cv2
14
+ from cog import BasePredictor, Input, Path
15
+
16
+ sys.path.insert(0, "third_part")
17
+ sys.path.insert(0, "third_part/GPEN")
18
+ sys.path.insert(0, "third_part/GFPGAN")
19
+
20
+ # 3dmm extraction
21
+ from third_part.face3d.util.preprocess import align_img
22
+ from third_part.face3d.util.load_mats import load_lm3d
23
+ from third_part.face3d.extract_kp_videos import KeypointExtractor
24
+
25
+ # face enhancement
26
+ from third_part.GPEN.gpen_face_enhancer import FaceEnhancement
27
+ from third_part.GFPGAN.gfpgan import GFPGANer
28
+
29
+ # expression control
30
+ from third_part.ganimation_replicate.model.ganimation import GANimationModel
31
+
32
+ from utils import audio
33
+ from utils.ffhq_preprocess import Croper
34
+ from utils.alignment_stit import crop_faces, calc_alignment_coefficients, paste_image
35
+ from utils.inference_utils import (
36
+ Laplacian_Pyramid_Blending_with_mask,
37
+ face_detect,
38
+ load_model,
39
+ options,
40
+ split_coeff,
41
+ trans_image,
42
+ transform_semantic,
43
+ find_crop_norm_ratio,
44
+ load_face3d_net,
45
+ exp_aus_dict,
46
+ )
47
+
48
+
49
+ class Predictor(BasePredictor):
50
+ def setup(self) -> None:
51
+ """Load the model into memory to make running multiple predictions efficient"""
52
+ self.enhancer = FaceEnhancement(
53
+ base_dir="checkpoints",
54
+ size=512,
55
+ model="GPEN-BFR-512",
56
+ use_sr=False,
57
+ sr_model="rrdb_realesrnet_psnr",
58
+ channel_multiplier=2,
59
+ narrow=1,
60
+ device="cuda",
61
+ )
62
+ self.restorer = GFPGANer(
63
+ model_path="checkpoints/GFPGANv1.3.pth",
64
+ upscale=1,
65
+ arch="clean",
66
+ channel_multiplier=2,
67
+ bg_upsampler=None,
68
+ )
69
+ self.croper = Croper("checkpoints/shape_predictor_68_face_landmarks.dat")
70
+ self.kp_extractor = KeypointExtractor()
71
+
72
+ face3d_net_path = "checkpoints/face3d_pretrain_epoch_20.pth"
73
+
74
+ self.net_recon = load_face3d_net(face3d_net_path, "cuda")
75
+ self.lm3d_std = load_lm3d("checkpoints/BFM")
76
+
77
+ def predict(
78
+ self,
79
+ face: Path = Input(description="Input video file of a talking-head."),
80
+ input_audio: Path = Input(description="Input audio file."),
81
+ ) -> Path:
82
+ """Run a single prediction on the model"""
83
+ device = "cuda"
84
+ args = argparse.Namespace(
85
+ DNet_path="checkpoints/DNet.pt",
86
+ LNet_path="checkpoints/LNet.pth",
87
+ ENet_path="checkpoints/ENet.pth",
88
+ face3d_net_path="checkpoints/face3d_pretrain_epoch_20.pth",
89
+ face=str(face),
90
+ audio=str(input_audio),
91
+ exp_img="neutral",
92
+ outfile=None,
93
+ fps=25,
94
+ pads=[0, 20, 0, 0],
95
+ face_det_batch_size=4,
96
+ LNet_batch_size=16,
97
+ img_size=384,
98
+ crop=[0, -1, 0, -1],
99
+ box=[-1, -1, -1, -1],
100
+ nosmooth=False,
101
+ static=False,
102
+ up_face="original",
103
+ one_shot=False,
104
+ without_rl1=False,
105
+ tmp_dir="temp",
106
+ re_preprocess=False,
107
+ )
108
+
109
+ base_name = args.face.split("/")[-1]
110
+
111
+ if args.face.split(".")[1] in ["jpg", "png", "jpeg"]:
112
+ full_frames = [cv2.imread(args.face)]
113
+ args.static = True
114
+ fps = args.fps
115
+ else:
116
+ video_stream = cv2.VideoCapture(args.face)
117
+ fps = video_stream.get(cv2.CAP_PROP_FPS)
118
+ full_frames = []
119
+ while True:
120
+ still_reading, frame = video_stream.read()
121
+ if not still_reading:
122
+ video_stream.release()
123
+ break
124
+ y1, y2, x1, x2 = args.crop
125
+ if x2 == -1:
126
+ x2 = frame.shape[1]
127
+ if y2 == -1:
128
+ y2 = frame.shape[0]
129
+ frame = frame[y1:y2, x1:x2]
130
+ full_frames.append(frame)
131
+
132
+ full_frames_RGB = [
133
+ cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) for frame in full_frames
134
+ ]
135
+ full_frames_RGB, crop, quad = self.croper.crop(full_frames_RGB, xsize=512)
136
+
137
+ clx, cly, crx, cry = crop
138
+ lx, ly, rx, ry = quad
139
+ lx, ly, rx, ry = int(lx), int(ly), int(rx), int(ry)
140
+ oy1, oy2, ox1, ox2 = (
141
+ cly + ly,
142
+ min(cly + ry, full_frames[0].shape[0]),
143
+ clx + lx,
144
+ min(clx + rx, full_frames[0].shape[1]),
145
+ )
146
+ # original_size = (ox2 - ox1, oy2 - oy1)
147
+ frames_pil = [
148
+ Image.fromarray(cv2.resize(frame, (256, 256))) for frame in full_frames_RGB
149
+ ]
150
+
151
+ # get the landmark according to the detected face.
152
+ if (
153
+ not os.path.isfile("temp/" + base_name + "_landmarks.txt")
154
+ or args.re_preprocess
155
+ ):
156
+ print("[Step 1] Landmarks Extraction in Video.")
157
+ lm = self.kp_extractor.extract_keypoint(
158
+ frames_pil, "./temp/" + base_name + "_landmarks.txt"
159
+ )
160
+ else:
161
+ print("[Step 1] Using saved landmarks.")
162
+ lm = np.loadtxt("temp/" + base_name + "_landmarks.txt").astype(np.float32)
163
+ lm = lm.reshape([len(full_frames), -1, 2])
164
+
165
+ if (
166
+ not os.path.isfile("temp/" + base_name + "_coeffs.npy")
167
+ or args.exp_img is not None
168
+ or args.re_preprocess
169
+ ):
170
+ video_coeffs = []
171
+ for idx in tqdm(
172
+ range(len(frames_pil)), desc="[Step 2] 3DMM Extraction In Video:"
173
+ ):
174
+ frame = frames_pil[idx]
175
+ W, H = frame.size
176
+ lm_idx = lm[idx].reshape([-1, 2])
177
+ if np.mean(lm_idx) == -1:
178
+ lm_idx = (self.lm3d_std[:, :2] + 1) / 2.0
179
+ lm_idx = np.concatenate([lm_idx[:, :1] * W, lm_idx[:, 1:2] * H], 1)
180
+ else:
181
+ lm_idx[:, -1] = H - 1 - lm_idx[:, -1]
182
+
183
+ trans_params, im_idx, lm_idx, _ = align_img(
184
+ frame, lm_idx, self.lm3d_std
185
+ )
186
+ trans_params = np.array(
187
+ [float(item) for item in np.hsplit(trans_params, 5)]
188
+ ).astype(np.float32)
189
+ im_idx_tensor = (
190
+ torch.tensor(np.array(im_idx) / 255.0, dtype=torch.float32)
191
+ .permute(2, 0, 1)
192
+ .to(device)
193
+ .unsqueeze(0)
194
+ )
195
+ with torch.no_grad():
196
+ coeffs = split_coeff(self.net_recon(im_idx_tensor))
197
+
198
+ pred_coeff = {key: coeffs[key].cpu().numpy() for key in coeffs}
199
+ pred_coeff = np.concatenate(
200
+ [
201
+ pred_coeff["id"],
202
+ pred_coeff["exp"],
203
+ pred_coeff["tex"],
204
+ pred_coeff["angle"],
205
+ pred_coeff["gamma"],
206
+ pred_coeff["trans"],
207
+ trans_params[None],
208
+ ],
209
+ 1,
210
+ )
211
+ video_coeffs.append(pred_coeff)
212
+ semantic_npy = np.array(video_coeffs)[:, 0]
213
+ np.save("temp/" + base_name + "_coeffs.npy", semantic_npy)
214
+ else:
215
+ print("[Step 2] Using saved coeffs.")
216
+ semantic_npy = np.load("temp/" + base_name + "_coeffs.npy").astype(
217
+ np.float32
218
+ )
219
+
220
+ # generate the 3dmm coeff from a single image
221
+ if args.exp_img == "smile":
222
+ expression = torch.tensor(
223
+ loadmat("checkpoints/expression.mat")["expression_mouth"]
224
+ )[0]
225
+ else:
226
+ print("using expression center")
227
+ expression = torch.tensor(
228
+ loadmat("checkpoints/expression.mat")["expression_center"]
229
+ )[0]
230
+
231
+ # load DNet, model(LNet and ENet)
232
+ D_Net, model = load_model(args, device)
233
+
234
+ if (
235
+ not os.path.isfile("temp/" + base_name + "_stablized.npy")
236
+ or args.re_preprocess
237
+ ):
238
+ imgs = []
239
+ for idx in tqdm(
240
+ range(len(frames_pil)),
241
+ desc="[Step 3] Stabilize the expression In Video:",
242
+ ):
243
+ if args.one_shot:
244
+ source_img = trans_image(frames_pil[0]).unsqueeze(0).to(device)
245
+ semantic_source_numpy = semantic_npy[0:1]
246
+ else:
247
+ source_img = trans_image(frames_pil[idx]).unsqueeze(0).to(device)
248
+ semantic_source_numpy = semantic_npy[idx : idx + 1]
249
+ ratio = find_crop_norm_ratio(semantic_source_numpy, semantic_npy)
250
+ coeff = (
251
+ transform_semantic(semantic_npy, idx, ratio).unsqueeze(0).to(device)
252
+ )
253
+
254
+ # hacking the new expression
255
+ coeff[:, :64, :] = expression[None, :64, None].to(device)
256
+ with torch.no_grad():
257
+ output = D_Net(source_img, coeff)
258
+ img_stablized = np.uint8(
259
+ (
260
+ output["fake_image"]
261
+ .squeeze(0)
262
+ .permute(1, 2, 0)
263
+ .cpu()
264
+ .clamp_(-1, 1)
265
+ .numpy()
266
+ + 1
267
+ )
268
+ / 2.0
269
+ * 255
270
+ )
271
+ imgs.append(cv2.cvtColor(img_stablized, cv2.COLOR_RGB2BGR))
272
+ np.save("temp/" + base_name + "_stablized.npy", imgs)
273
+ del D_Net
274
+ else:
275
+ print("[Step 3] Using saved stabilized video.")
276
+ imgs = np.load("temp/" + base_name + "_stablized.npy")
277
+ torch.cuda.empty_cache()
278
+
279
+ if not args.audio.endswith(".wav"):
280
+ command = "ffmpeg -loglevel error -y -i {} -strict -2 {}".format(
281
+ args.audio, "temp/{}/temp.wav".format(args.tmp_dir)
282
+ )
283
+ subprocess.call(command, shell=True)
284
+ args.audio = "temp/{}/temp.wav".format(args.tmp_dir)
285
+ wav = audio.load_wav(args.audio, 16000)
286
+ mel = audio.melspectrogram(wav)
287
+ if np.isnan(mel.reshape(-1)).sum() > 0:
288
+ raise ValueError(
289
+ "Mel contains nan! Using a TTS voice? Add a small epsilon noise to the wav file and try again"
290
+ )
291
+
292
+ mel_step_size, mel_idx_multiplier, i, mel_chunks = 16, 80.0 / fps, 0, []
293
+ while True:
294
+ start_idx = int(i * mel_idx_multiplier)
295
+ if start_idx + mel_step_size > len(mel[0]):
296
+ mel_chunks.append(mel[:, len(mel[0]) - mel_step_size :])
297
+ break
298
+ mel_chunks.append(mel[:, start_idx : start_idx + mel_step_size])
299
+ i += 1
300
+
301
+ print("[Step 4] Load audio; Length of mel chunks: {}".format(len(mel_chunks)))
302
+ imgs = imgs[: len(mel_chunks)]
303
+ full_frames = full_frames[: len(mel_chunks)]
304
+ lm = lm[: len(mel_chunks)]
305
+
306
+ imgs_enhanced = []
307
+ for idx in tqdm(range(len(imgs)), desc="[Step 5] Reference Enhancement"):
308
+ img = imgs[idx]
309
+ pred, _, _ = self.enhancer.process(
310
+ img, img, face_enhance=True, possion_blending=False
311
+ )
312
+ imgs_enhanced.append(pred)
313
+ gen = datagen(
314
+ imgs_enhanced.copy(), mel_chunks, full_frames, args, (oy1, oy2, ox1, ox2)
315
+ )
316
+
317
+ frame_h, frame_w = full_frames[0].shape[:-1]
318
+ out = cv2.VideoWriter(
319
+ "temp/{}/result.mp4".format(args.tmp_dir),
320
+ cv2.VideoWriter_fourcc(*"mp4v"),
321
+ fps,
322
+ (frame_w, frame_h),
323
+ )
324
+
325
+ if args.up_face != "original":
326
+ instance = GANimationModel()
327
+ instance.initialize()
328
+ instance.setup()
329
+
330
+ # kp_extractor = KeypointExtractor()
331
+ for i, (
332
+ img_batch,
333
+ mel_batch,
334
+ frames,
335
+ coords,
336
+ img_original,
337
+ f_frames,
338
+ ) in enumerate(
339
+ tqdm(
340
+ gen,
341
+ desc="[Step 6] Lip Synthesis:",
342
+ total=int(np.ceil(float(len(mel_chunks)) / args.LNet_batch_size)),
343
+ )
344
+ ):
345
+ img_batch = torch.FloatTensor(np.transpose(img_batch, (0, 3, 1, 2))).to(
346
+ device
347
+ )
348
+ mel_batch = torch.FloatTensor(np.transpose(mel_batch, (0, 3, 1, 2))).to(
349
+ device
350
+ )
351
+ img_original = (
352
+ torch.FloatTensor(np.transpose(img_original, (0, 3, 1, 2))).to(device)
353
+ / 255.0
354
+ ) # BGR -> RGB
355
+
356
+ with torch.no_grad():
357
+ incomplete, reference = torch.split(img_batch, 3, dim=1)
358
+ pred, low_res = model(mel_batch, img_batch, reference)
359
+ pred = torch.clamp(pred, 0, 1)
360
+
361
+ if args.up_face in ["sad", "angry", "surprise"]:
362
+ tar_aus = exp_aus_dict[args.up_face]
363
+ else:
364
+ pass
365
+
366
+ if args.up_face == "original":
367
+ cur_gen_faces = img_original
368
+ else:
369
+ test_batch = {
370
+ "src_img": torch.nn.functional.interpolate(
371
+ (img_original * 2 - 1), size=(128, 128), mode="bilinear"
372
+ ),
373
+ "tar_aus": tar_aus.repeat(len(incomplete), 1),
374
+ }
375
+ instance.feed_batch(test_batch)
376
+ instance.forward()
377
+ cur_gen_faces = torch.nn.functional.interpolate(
378
+ instance.fake_img / 2.0 + 0.5, size=(384, 384), mode="bilinear"
379
+ )
380
+
381
+ if args.without_rl1 is not False:
382
+ incomplete, reference = torch.split(img_batch, 3, dim=1)
383
+ mask = torch.where(
384
+ incomplete == 0,
385
+ torch.ones_like(incomplete),
386
+ torch.zeros_like(incomplete),
387
+ )
388
+ pred = pred * mask + cur_gen_faces * (1 - mask)
389
+
390
+ pred = pred.cpu().numpy().transpose(0, 2, 3, 1) * 255.0
391
+
392
+ torch.cuda.empty_cache()
393
+ for p, f, xf, c in zip(pred, frames, f_frames, coords):
394
+ y1, y2, x1, x2 = c
395
+ p = cv2.resize(p.astype(np.uint8), (x2 - x1, y2 - y1))
396
+
397
+ ff = xf.copy()
398
+ ff[y1:y2, x1:x2] = p
399
+
400
+ # month region enhancement by GFPGAN
401
+ cropped_faces, restored_faces, restored_img = self.restorer.enhance(
402
+ ff, has_aligned=False, only_center_face=True, paste_back=True
403
+ )
404
+ # 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
405
+ mm = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 0, 0, 0, 0, 0]
406
+ mouse_mask = np.zeros_like(restored_img)
407
+ tmp_mask = self.enhancer.faceparser.process(
408
+ restored_img[y1:y2, x1:x2], mm
409
+ )[0]
410
+ mouse_mask[y1:y2, x1:x2] = (
411
+ cv2.resize(tmp_mask, (x2 - x1, y2 - y1))[:, :, np.newaxis] / 255.0
412
+ )
413
+
414
+ height, width = ff.shape[:2]
415
+ restored_img, ff, full_mask = [
416
+ cv2.resize(x, (512, 512))
417
+ for x in (restored_img, ff, np.float32(mouse_mask))
418
+ ]
419
+ img = Laplacian_Pyramid_Blending_with_mask(
420
+ restored_img, ff, full_mask[:, :, 0], 10
421
+ )
422
+ pp = np.uint8(cv2.resize(np.clip(img, 0, 255), (width, height)))
423
+
424
+ pp, orig_faces, enhanced_faces = self.enhancer.process(
425
+ pp, xf, bbox=c, face_enhance=False, possion_blending=True
426
+ )
427
+ out.write(pp)
428
+ out.release()
429
+
430
+ output_file = "/tmp/output.mp4"
431
+ command = "ffmpeg -loglevel error -y -i {} -i {} -strict -2 -q:v 1 {}".format(
432
+ args.audio, "temp/{}/result.mp4".format(args.tmp_dir), output_file
433
+ )
434
+ subprocess.call(command, shell=True)
435
+
436
+ return Path(output_file)
437
+
438
+
439
+ # frames:256x256, full_frames: original size
440
+ def datagen(frames, mels, full_frames, args, cox):
441
+ img_batch, mel_batch, frame_batch, coords_batch, ref_batch, full_frame_batch = (
442
+ [],
443
+ [],
444
+ [],
445
+ [],
446
+ [],
447
+ [],
448
+ )
449
+ base_name = args.face.split("/")[-1]
450
+ refs = []
451
+ image_size = 256
452
+
453
+ # original frames
454
+ kp_extractor = KeypointExtractor()
455
+ fr_pil = [Image.fromarray(frame) for frame in frames]
456
+ lms = kp_extractor.extract_keypoint(
457
+ fr_pil, "temp/" + base_name + "x12_landmarks.txt"
458
+ )
459
+ frames_pil = [
460
+ (lm, frame) for frame, lm in zip(fr_pil, lms)
461
+ ] # frames is the croped version of modified face
462
+ crops, orig_images, quads = crop_faces(
463
+ image_size, frames_pil, scale=1.0, use_fa=True
464
+ )
465
+ inverse_transforms = [
466
+ calc_alignment_coefficients(
467
+ quad + 0.5,
468
+ [[0, 0], [0, image_size], [image_size, image_size], [image_size, 0]],
469
+ )
470
+ for quad in quads
471
+ ]
472
+ del kp_extractor.detector
473
+
474
+ oy1, oy2, ox1, ox2 = cox
475
+ face_det_results = face_detect(full_frames, args, jaw_correction=True)
476
+
477
+ for inverse_transform, crop, full_frame, face_det in zip(
478
+ inverse_transforms, crops, full_frames, face_det_results
479
+ ):
480
+ imc_pil = paste_image(
481
+ inverse_transform,
482
+ crop,
483
+ Image.fromarray(
484
+ cv2.resize(
485
+ full_frame[int(oy1) : int(oy2), int(ox1) : int(ox2)], (256, 256)
486
+ )
487
+ ),
488
+ )
489
+
490
+ ff = full_frame.copy()
491
+ ff[int(oy1) : int(oy2), int(ox1) : int(ox2)] = cv2.resize(
492
+ np.array(imc_pil.convert("RGB")), (ox2 - ox1, oy2 - oy1)
493
+ )
494
+ oface, coords = face_det
495
+ y1, y2, x1, x2 = coords
496
+ refs.append(ff[y1:y2, x1:x2])
497
+
498
+ for i, m in enumerate(mels):
499
+ idx = 0 if args.static else i % len(frames)
500
+ frame_to_save = frames[idx].copy()
501
+ face = refs[idx]
502
+ oface, coords = face_det_results[idx].copy()
503
+
504
+ face = cv2.resize(face, (args.img_size, args.img_size))
505
+ oface = cv2.resize(oface, (args.img_size, args.img_size))
506
+
507
+ img_batch.append(oface)
508
+ ref_batch.append(face)
509
+ mel_batch.append(m)
510
+ coords_batch.append(coords)
511
+ frame_batch.append(frame_to_save)
512
+ full_frame_batch.append(full_frames[idx].copy())
513
+
514
+ if len(img_batch) >= args.LNet_batch_size:
515
+ img_batch, mel_batch, ref_batch = (
516
+ np.asarray(img_batch),
517
+ np.asarray(mel_batch),
518
+ np.asarray(ref_batch),
519
+ )
520
+ img_masked = img_batch.copy()
521
+ img_original = img_batch.copy()
522
+ img_masked[:, args.img_size // 2 :] = 0
523
+ img_batch = np.concatenate((img_masked, ref_batch), axis=3) / 255.0
524
+ mel_batch = np.reshape(
525
+ mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1]
526
+ )
527
+
528
+ yield img_batch, mel_batch, frame_batch, coords_batch, img_original, full_frame_batch
529
+ (
530
+ img_batch,
531
+ mel_batch,
532
+ frame_batch,
533
+ coords_batch,
534
+ img_original,
535
+ full_frame_batch,
536
+ ref_batch,
537
+ ) = ([], [], [], [], [], [], [])
538
+
539
+ if len(img_batch) > 0:
540
+ img_batch, mel_batch, ref_batch = (
541
+ np.asarray(img_batch),
542
+ np.asarray(mel_batch),
543
+ np.asarray(ref_batch),
544
+ )
545
+ img_masked = img_batch.copy()
546
+ img_original = img_batch.copy()
547
+ img_masked[:, args.img_size // 2 :] = 0
548
+ img_batch = np.concatenate((img_masked, ref_batch), axis=3) / 255.0
549
+ mel_batch = np.reshape(
550
+ mel_batch, [len(mel_batch), mel_batch.shape[1], mel_batch.shape[2], 1]
551
+ )
552
+ yield img_batch, mel_batch, frame_batch, coords_batch, img_original, full_frame_batch
requirements.txt ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ torch
3
+ torchvision
4
+ basicsr
5
+ kornia
6
+ face-alignment
7
+ ninja
8
+ einops
9
+ facexlib
10
+ librosa
11
+ dlib
12
+ numpy
13
+ fastapi
14
+ uvicorn
15
+ ninja
temp/1.mp4_coeffs.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99747e3debfae74d7169f06fa016e98385e081aa9301071db342dec38818588
3
+ size 359592
temp/1.mp4_landmarks.txt ADDED
The diff for this file is too large to render. See raw diff
 
temp/1.mp4_stablized.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55b8bc8e44ea961ce4c84ec162a45a6f337dbc95fe6dcf6711d66a1b4421fa70
3
+ size 67436672
temp/1.mp4x12_landmarks.txt ADDED
The diff for this file is too large to render. See raw diff
 
temp/dropbox5.mp4_coeffs.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae925c8a5488ee06eeef23e40dc3b17e91090d00373d0e5e670233fb015e5331
3
+ size 351208
temp/dropbox5.mp4_landmarks.txt ADDED
The diff for this file is too large to render. See raw diff
 
temp/dropbox5.mp4_stablized.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e5b20415ce5616868467f44b0e557296b86e0b07ac76fbfae4355672c61dd1
3
+ size 65863808
temp/dropbox5.mp4x12_landmarks.txt ADDED
The diff for this file is too large to render. See raw diff
 
temp/face.mp4_coeffs.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96e6fbc6c00626ac512d50086324625a80e38d7e73c99227815df2a7db985760
3
+ size 631024
temp/face.mp4_landmarks.txt ADDED
The diff for this file is too large to render. See raw diff
 
temp/face.mp4_stablized.npy ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280d25163a5b9f6cfa00da6c57b1b68588874d711a458b003b5a0162047e55bf
3
+ size 118358144
temp/face.mp4x12_landmarks.txt ADDED
The diff for this file is too large to render. See raw diff
 
temp/temp/result.mp4 ADDED
Binary file (44 Bytes). View file
 
temp/temp/temp.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa99fa7f98235d432295627485004a4d67922055f679520bad65843a24fba75e
3
+ size 1367060
third_part/GFPGAN/LICENSE ADDED
@@ -0,0 +1,351 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Tencent is pleased to support the open source community by making GFPGAN available.
2
+
3
+ Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
4
+
5
+ GFPGAN is licensed under the Apache License Version 2.0 except for the third-party components listed below.
6
+
7
+
8
+ Terms of the Apache License Version 2.0:
9
+ ---------------------------------------------
10
+ Apache License
11
+
12
+ Version 2.0, January 2004
13
+
14
+ http://www.apache.org/licenses/
15
+
16
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
17
+ 1. Definitions.
18
+
19
+ “License” shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
20
+
21
+ “Licensor” shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
22
+
23
+ “Legal Entity” shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, “control” means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
24
+
25
+ “You” (or “Your”) shall mean an individual or Legal Entity exercising permissions granted by this License.
26
+
27
+ “Source” form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files.
28
+
29
+ “Object” form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types.
30
+
31
+ “Work” shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
32
+
33
+ “Derivative Works” shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
34
+
35
+ “Contribution” shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, “submitted” means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as “Not a Contribution.”
36
+
37
+ “Contributor” shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work.
38
+
39
+ 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form.
40
+
41
+ 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed.
42
+
43
+ 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions:
44
+
45
+ You must give any other recipients of the Work or Derivative Works a copy of this License; and
46
+
47
+ You must cause any modified files to carry prominent notices stating that You changed the files; and
48
+
49
+ You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
50
+
51
+ If the Work includes a “NOTICE” text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
52
+
53
+ You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
54
+
55
+ 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions.
56
+
57
+ 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file.
58
+
59
+ 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
60
+
61
+ 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages.
62
+
63
+ 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability.
64
+
65
+ END OF TERMS AND CONDITIONS
66
+
67
+
68
+
69
+ Other dependencies and licenses:
70
+
71
+
72
+ Open Source Software licensed under the Apache 2.0 license and Other Licenses of the Third-Party Components therein:
73
+ ---------------------------------------------
74
+ 1. basicsr
75
+ Copyright 2018-2020 BasicSR Authors
76
+
77
+
78
+ This BasicSR project is released under the Apache 2.0 license.
79
+
80
+ A copy of Apache 2.0 is included in this file.
81
+
82
+ StyleGAN2
83
+ The codes are modified from the repository stylegan2-pytorch. Many thanks to the author - Kim Seonghyeon 😊 for translating from the official TensorFlow codes to PyTorch ones. Here is the license of stylegan2-pytorch.
84
+ The official repository is https://github.com/NVlabs/stylegan2, and here is the NVIDIA license.
85
+ DFDNet
86
+ The codes are largely modified from the repository DFDNet. Their license is Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License.
87
+
88
+ Terms of the Nvidia License:
89
+ ---------------------------------------------
90
+
91
+ 1. Definitions
92
+
93
+ "Licensor" means any person or entity that distributes its Work.
94
+
95
+ "Software" means the original work of authorship made available under
96
+ this License.
97
+
98
+ "Work" means the Software and any additions to or derivative works of
99
+ the Software that are made available under this License.
100
+
101
+ "Nvidia Processors" means any central processing unit (CPU), graphics
102
+ processing unit (GPU), field-programmable gate array (FPGA),
103
+ application-specific integrated circuit (ASIC) or any combination
104
+ thereof designed, made, sold, or provided by Nvidia or its affiliates.
105
+
106
+ The terms "reproduce," "reproduction," "derivative works," and
107
+ "distribution" have the meaning as provided under U.S. copyright law;
108
+ provided, however, that for the purposes of this License, derivative
109
+ works shall not include works that remain separable from, or merely
110
+ link (or bind by name) to the interfaces of, the Work.
111
+
112
+ Works, including the Software, are "made available" under this License
113
+ by including in or with the Work either (a) a copyright notice
114
+ referencing the applicability of this License to the Work, or (b) a
115
+ copy of this License.
116
+
117
+ 2. License Grants
118
+
119
+ 2.1 Copyright Grant. Subject to the terms and conditions of this
120
+ License, each Licensor grants to you a perpetual, worldwide,
121
+ non-exclusive, royalty-free, copyright license to reproduce,
122
+ prepare derivative works of, publicly display, publicly perform,
123
+ sublicense and distribute its Work and any resulting derivative
124
+ works in any form.
125
+
126
+ 3. Limitations
127
+
128
+ 3.1 Redistribution. You may reproduce or distribute the Work only
129
+ if (a) you do so under this License, (b) you include a complete
130
+ copy of this License with your distribution, and (c) you retain
131
+ without modification any copyright, patent, trademark, or
132
+ attribution notices that are present in the Work.
133
+
134
+ 3.2 Derivative Works. You may specify that additional or different
135
+ terms apply to the use, reproduction, and distribution of your
136
+ derivative works of the Work ("Your Terms") only if (a) Your Terms
137
+ provide that the use limitation in Section 3.3 applies to your
138
+ derivative works, and (b) you identify the specific derivative
139
+ works that are subject to Your Terms. Notwithstanding Your Terms,
140
+ this License (including the redistribution requirements in Section
141
+ 3.1) will continue to apply to the Work itself.
142
+
143
+ 3.3 Use Limitation. The Work and any derivative works thereof only
144
+ may be used or intended for use non-commercially. The Work or
145
+ derivative works thereof may be used or intended for use by Nvidia
146
+ or its affiliates commercially or non-commercially. As used herein,
147
+ "non-commercially" means for research or evaluation purposes only.
148
+
149
+ 3.4 Patent Claims. If you bring or threaten to bring a patent claim
150
+ against any Licensor (including any claim, cross-claim or
151
+ counterclaim in a lawsuit) to enforce any patents that you allege
152
+ are infringed by any Work, then your rights under this License from
153
+ such Licensor (including the grants in Sections 2.1 and 2.2) will
154
+ terminate immediately.
155
+
156
+ 3.5 Trademarks. This License does not grant any rights to use any
157
+ Licensor's or its affiliates' names, logos, or trademarks, except
158
+ as necessary to reproduce the notices described in this License.
159
+
160
+ 3.6 Termination. If you violate any term of this License, then your
161
+ rights under this License (including the grants in Sections 2.1 and
162
+ 2.2) will terminate immediately.
163
+
164
+ 4. Disclaimer of Warranty.
165
+
166
+ THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
167
+ KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
168
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
169
+ NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
170
+ THIS LICENSE.
171
+
172
+ 5. Limitation of Liability.
173
+
174
+ EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
175
+ THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
176
+ SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
177
+ INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
178
+ OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
179
+ (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
180
+ LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
181
+ COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
182
+ THE POSSIBILITY OF SUCH DAMAGES.
183
+
184
+ MIT License
185
+
186
+ Copyright (c) 2019 Kim Seonghyeon
187
+
188
+ Permission is hereby granted, free of charge, to any person obtaining a copy
189
+ of this software and associated documentation files (the "Software"), to deal
190
+ in the Software without restriction, including without limitation the rights
191
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
192
+ copies of the Software, and to permit persons to whom the Software is
193
+ furnished to do so, subject to the following conditions:
194
+
195
+ The above copyright notice and this permission notice shall be included in all
196
+ copies or substantial portions of the Software.
197
+
198
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
199
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
200
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
201
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
202
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
203
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
204
+ SOFTWARE.
205
+
206
+
207
+
208
+ Open Source Software licensed under the BSD 3-Clause license:
209
+ ---------------------------------------------
210
+ 1. torchvision
211
+ Copyright (c) Soumith Chintala 2016,
212
+ All rights reserved.
213
+
214
+ 2. torch
215
+ Copyright (c) 2016- Facebook, Inc (Adam Paszke)
216
+ Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
217
+ Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
218
+ Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
219
+ Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
220
+ Copyright (c) 2011-2013 NYU (Clement Farabet)
221
+ Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
222
+ Copyright (c) 2006 Idiap Research Institute (Samy Bengio)
223
+ Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
224
+
225
+
226
+ Terms of the BSD 3-Clause License:
227
+ ---------------------------------------------
228
+ Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
229
+
230
+ 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
231
+
232
+ 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
233
+
234
+ 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
235
+
236
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
237
+
238
+
239
+
240
+ Open Source Software licensed under the BSD 3-Clause License and Other Licenses of the Third-Party Components therein:
241
+ ---------------------------------------------
242
+ 1. numpy
243
+ Copyright (c) 2005-2020, NumPy Developers.
244
+ All rights reserved.
245
+
246
+ A copy of BSD 3-Clause License is included in this file.
247
+
248
+ The NumPy repository and source distributions bundle several libraries that are
249
+ compatibly licensed. We list these here.
250
+
251
+ Name: Numpydoc
252
+ Files: doc/sphinxext/numpydoc/*
253
+ License: BSD-2-Clause
254
+ For details, see doc/sphinxext/LICENSE.txt
255
+
256
+ Name: scipy-sphinx-theme
257
+ Files: doc/scipy-sphinx-theme/*
258
+ License: BSD-3-Clause AND PSF-2.0 AND Apache-2.0
259
+ For details, see doc/scipy-sphinx-theme/LICENSE.txt
260
+
261
+ Name: lapack-lite
262
+ Files: numpy/linalg/lapack_lite/*
263
+ License: BSD-3-Clause
264
+ For details, see numpy/linalg/lapack_lite/LICENSE.txt
265
+
266
+ Name: tempita
267
+ Files: tools/npy_tempita/*
268
+ License: MIT
269
+ For details, see tools/npy_tempita/license.txt
270
+
271
+ Name: dragon4
272
+ Files: numpy/core/src/multiarray/dragon4.c
273
+ License: MIT
274
+ For license text, see numpy/core/src/multiarray/dragon4.c
275
+
276
+
277
+
278
+ Open Source Software licensed under the MIT license:
279
+ ---------------------------------------------
280
+ 1. facexlib
281
+ Copyright (c) 2020 Xintao Wang
282
+
283
+ 2. opencv-python
284
+ Copyright (c) Olli-Pekka Heinisuo
285
+ Please note that only files in cv2 package are used.
286
+
287
+
288
+ Terms of the MIT License:
289
+ ---------------------------------------------
290
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
291
+
292
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
293
+
294
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
295
+
296
+
297
+
298
+ Open Source Software licensed under the MIT license and Other Licenses of the Third-Party Components therein:
299
+ ---------------------------------------------
300
+ 1. tqdm
301
+ Copyright (c) 2013 noamraph
302
+
303
+ `tqdm` is a product of collaborative work.
304
+ Unless otherwise stated, all authors (see commit logs) retain copyright
305
+ for their respective work, and release the work under the MIT licence
306
+ (text below).
307
+
308
+ Exceptions or notable authors are listed below
309
+ in reverse chronological order:
310
+
311
+ * files: *
312
+ MPLv2.0 2015-2020 (c) Casper da Costa-Luis
313
+ [casperdcl](https://github.com/casperdcl).
314
+ * files: tqdm/_tqdm.py
315
+ MIT 2016 (c) [PR #96] on behalf of Google Inc.
316
+ * files: tqdm/_tqdm.py setup.py README.rst MANIFEST.in .gitignore
317
+ MIT 2013 (c) Noam Yorav-Raphael, original author.
318
+
319
+ [PR #96]: https://github.com/tqdm/tqdm/pull/96
320
+
321
+
322
+ Mozilla Public Licence (MPL) v. 2.0 - Exhibit A
323
+ -----------------------------------------------
324
+
325
+ This Source Code Form is subject to the terms of the
326
+ Mozilla Public License, v. 2.0.
327
+ If a copy of the MPL was not distributed with this file,
328
+ You can obtain one at https://mozilla.org/MPL/2.0/.
329
+
330
+
331
+ MIT License (MIT)
332
+ -----------------
333
+
334
+ Copyright (c) 2013 noamraph
335
+
336
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
337
+ this software and associated documentation files (the "Software"), to deal in
338
+ the Software without restriction, including without limitation the rights to
339
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
340
+ the Software, and to permit persons to whom the Software is furnished to do so,
341
+ subject to the following conditions:
342
+
343
+ The above copyright notice and this permission notice shall be included in all
344
+ copies or substantial portions of the Software.
345
+
346
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
347
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
348
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
349
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
350
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
351
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
third_part/GFPGAN/gfpgan/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # flake8: noqa
2
+
3
+ from .archs import *
4
+ from .data import *
5
+ from .models import *
6
+ from .utils import *
7
+
8
+ # from .version import *
third_part/GFPGAN/gfpgan/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (259 Bytes). View file
 
third_part/GFPGAN/gfpgan/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (263 Bytes). View file
 
third_part/GFPGAN/gfpgan/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (263 Bytes). View file
 
third_part/GFPGAN/gfpgan/__pycache__/utils.cpython-37.pyc ADDED
Binary file (4 kB). View file
 
third_part/GFPGAN/gfpgan/__pycache__/utils.cpython-38.pyc ADDED
Binary file (4.09 kB). View file
 
third_part/GFPGAN/gfpgan/__pycache__/utils.cpython-39.pyc ADDED
Binary file (4.08 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import importlib
2
+ from basicsr.utils import scandir
3
+ from os import path as osp
4
+
5
+ # automatically scan and import arch modules for registry
6
+ # scan all the files that end with '_arch.py' under the archs folder
7
+ arch_folder = osp.dirname(osp.abspath(__file__))
8
+ arch_filenames = [osp.splitext(osp.basename(v))[0] for v in scandir(arch_folder) if v.endswith('_arch.py')]
9
+ # import all the arch modules
10
+ _arch_modules = [importlib.import_module(f'gfpgan.archs.{file_name}') for file_name in arch_filenames]
third_part/GFPGAN/gfpgan/archs/__pycache__/__init__.cpython-37.pyc ADDED
Binary file (736 Bytes). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (754 Bytes). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (744 Bytes). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/arcface_arch.cpython-37.pyc ADDED
Binary file (7.48 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/arcface_arch.cpython-38.pyc ADDED
Binary file (7.45 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/arcface_arch.cpython-39.pyc ADDED
Binary file (7.44 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/gfpgan_bilinear_arch.cpython-37.pyc ADDED
Binary file (9.12 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/gfpgan_bilinear_arch.cpython-38.pyc ADDED
Binary file (9.18 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/gfpgan_bilinear_arch.cpython-39.pyc ADDED
Binary file (9.16 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_arch.cpython-37.pyc ADDED
Binary file (13 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_arch.cpython-38.pyc ADDED
Binary file (12.9 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_arch.cpython-39.pyc ADDED
Binary file (12.9 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_clean_arch.cpython-37.pyc ADDED
Binary file (9.59 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_clean_arch.cpython-38.pyc ADDED
Binary file (9.6 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/gfpganv1_clean_arch.cpython-39.pyc ADDED
Binary file (9.59 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_bilinear_arch.cpython-37.pyc ADDED
Binary file (18.1 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_bilinear_arch.cpython-38.pyc ADDED
Binary file (18 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_bilinear_arch.cpython-39.pyc ADDED
Binary file (18 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_clean_arch.cpython-37.pyc ADDED
Binary file (11.8 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_clean_arch.cpython-38.pyc ADDED
Binary file (11.8 kB). View file
 
third_part/GFPGAN/gfpgan/archs/__pycache__/stylegan2_clean_arch.cpython-39.pyc ADDED
Binary file (11.8 kB). View file