Spaces:
Paused
Paused
Update engine.py
Browse files
engine.py
CHANGED
@@ -9,10 +9,15 @@ import base64
|
|
9 |
from queue import Queue
|
10 |
from typing import Dict, Any, List, Optional, Union
|
11 |
from functools import lru_cache
|
|
|
12 |
import numpy as np
|
13 |
import torch
|
14 |
import torch.nn.functional as F
|
15 |
from PIL import Image, ImageOps
|
|
|
|
|
|
|
|
|
16 |
|
17 |
from liveportrait.config.argument_config import ArgumentConfig
|
18 |
from liveportrait.utils.camera import get_rotation_matrix
|
@@ -62,6 +67,173 @@ class Engine:
|
|
62 |
|
63 |
logger.info("✅ FacePoke Engine initialized successfully.")
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
@alru_cache(maxsize=512)
|
66 |
async def load_image(self, data):
|
67 |
image = Image.open(io.BytesIO(data))
|
@@ -222,7 +394,7 @@ class Engine:
|
|
222 |
# write it into a webp
|
223 |
result_image.save(buffered, format="WebP", quality=82, lossless=False, method=6)
|
224 |
|
225 |
-
return buffered.getvalue()
|
226 |
|
227 |
except Exception as e:
|
228 |
raise ValueError(f"Failed to modify image: {str(e)}")
|
|
|
9 |
from queue import Queue
|
10 |
from typing import Dict, Any, List, Optional, Union
|
11 |
from functools import lru_cache
|
12 |
+
from cv2 import transform
|
13 |
import numpy as np
|
14 |
import torch
|
15 |
import torch.nn.functional as F
|
16 |
from PIL import Image, ImageOps
|
17 |
+
import tqdm
|
18 |
+
from tqdm import tqdm as loader
|
19 |
+
|
20 |
+
import cv2
|
21 |
|
22 |
from liveportrait.config.argument_config import ArgumentConfig
|
23 |
from liveportrait.utils.camera import get_rotation_matrix
|
|
|
67 |
|
68 |
logger.info("✅ FacePoke Engine initialized successfully.")
|
69 |
|
70 |
+
async def load_frames(self, frames):
|
71 |
+
uid = str(uuid.uuid4())
|
72 |
+
for frame in loader(frames):
|
73 |
+
await self.load_frame(frame, uid)
|
74 |
+
|
75 |
+
return {
|
76 |
+
'u': uid
|
77 |
+
}
|
78 |
+
|
79 |
+
async def load_frame(self, frame, uid):
|
80 |
+
image = Image.fromarray(frame)
|
81 |
+
image = image.convert('RGB')
|
82 |
+
|
83 |
+
img_rgb = np.array(image)
|
84 |
+
|
85 |
+
inference_cfg = self.live_portrait.live_portrait_wrapper.cfg
|
86 |
+
img_rgb = await asyncio.to_thread(resize_to_limit, img_rgb, inference_cfg.ref_max_shape, inference_cfg.ref_shape_n)
|
87 |
+
crop_info = await asyncio.to_thread(self.live_portrait.cropper.crop_single_image, img_rgb)
|
88 |
+
img_crop_256x256 = crop_info['img_crop_256x256']
|
89 |
+
|
90 |
+
I_s = await asyncio.to_thread(self.live_portrait.live_portrait_wrapper.prepare_source, img_crop_256x256)
|
91 |
+
x_s_info = await asyncio.to_thread(self.live_portrait.live_portrait_wrapper.get_kp_info, I_s)
|
92 |
+
f_s = await asyncio.to_thread(self.live_portrait.live_portrait_wrapper.extract_feature_3d, I_s)
|
93 |
+
x_s = await asyncio.to_thread(self.live_portrait.live_portrait_wrapper.transform_keypoint, x_s_info)
|
94 |
+
|
95 |
+
processed_data = {
|
96 |
+
'img_rgb': img_rgb,
|
97 |
+
'crop_info': crop_info,
|
98 |
+
'x_s_info': x_s_info,
|
99 |
+
'f_s': f_s,
|
100 |
+
'x_s': x_s,
|
101 |
+
'inference_cfg': inference_cfg
|
102 |
+
}
|
103 |
+
|
104 |
+
if uid in self.processed_cache:
|
105 |
+
self.processed_cache[uid].append(processed_data)
|
106 |
+
else:
|
107 |
+
self.processed_cache[uid] = [processed_data]
|
108 |
+
|
109 |
+
# Calculate the bounding box
|
110 |
+
bbox_info = parse_bbox_from_landmark(processed_data['crop_info']['lmk_crop'], scale=1.0)
|
111 |
+
|
112 |
+
return {
|
113 |
+
'u': uid,
|
114 |
+
}
|
115 |
+
|
116 |
+
async def transform_video(self, uid: str, params: Dict[str, float]) -> bytes:
|
117 |
+
if uid not in self.processed_cache:
|
118 |
+
raise ValueError("cache miss")
|
119 |
+
|
120 |
+
data = self.processed_cache[uid]
|
121 |
+
|
122 |
+
for processed in loader(data):
|
123 |
+
_, image = await self.transform_frame(processed, params)
|
124 |
+
yield image
|
125 |
+
|
126 |
+
async def transform_frame(self, processed_data, params: Dict[str, float]) -> bytes:
|
127 |
+
try:
|
128 |
+
# Apply modifications based on params
|
129 |
+
x_d_new = processed_data['x_s_info']['kp'].clone()
|
130 |
+
|
131 |
+
# Adapted from https://github.com/PowerHouseMan/ComfyUI-AdvancedLivePortrait/blob/main/nodes.py#L408-L472
|
132 |
+
modifications = [
|
133 |
+
('smile', [
|
134 |
+
(0, 20, 1, -0.01), (0, 14, 1, -0.02), (0, 17, 1, 0.0065), (0, 17, 2, 0.003),
|
135 |
+
(0, 13, 1, -0.00275), (0, 16, 1, -0.00275), (0, 3, 1, -0.0035), (0, 7, 1, -0.0035)
|
136 |
+
]),
|
137 |
+
('aaa', [
|
138 |
+
(0, 19, 1, 0.001), (0, 19, 2, 0.0001), (0, 17, 1, -0.0001)
|
139 |
+
]),
|
140 |
+
('eee', [
|
141 |
+
(0, 20, 2, -0.001), (0, 20, 1, -0.001), (0, 14, 1, -0.001)
|
142 |
+
]),
|
143 |
+
('woo', [
|
144 |
+
(0, 14, 1, 0.001), (0, 3, 1, -0.0005), (0, 7, 1, -0.0005), (0, 17, 2, -0.0005)
|
145 |
+
]),
|
146 |
+
('wink', [
|
147 |
+
(0, 11, 1, 0.001), (0, 13, 1, -0.0003), (0, 17, 0, 0.0003),
|
148 |
+
(0, 17, 1, 0.0003), (0, 3, 1, -0.0003)
|
149 |
+
]),
|
150 |
+
('pupil_x', [
|
151 |
+
(0, 11, 0, 0.0007 if params.get('pupil_x', 0) > 0 else 0.001),
|
152 |
+
(0, 15, 0, 0.001 if params.get('pupil_x', 0) > 0 else 0.0007)
|
153 |
+
]),
|
154 |
+
('pupil_y', [
|
155 |
+
(0, 11, 1, -0.001), (0, 15, 1, -0.001)
|
156 |
+
]),
|
157 |
+
('eyes', [
|
158 |
+
(0, 11, 1, -0.001), (0, 13, 1, 0.0003), (0, 15, 1, -0.001), (0, 16, 1, 0.0003),
|
159 |
+
(0, 1, 1, -0.00025), (0, 2, 1, 0.00025)
|
160 |
+
]),
|
161 |
+
('eyebrow', [
|
162 |
+
(0, 1, 1, 0.001 if params.get('eyebrow', 0) > 0 else 0.0003),
|
163 |
+
(0, 2, 1, -0.001 if params.get('eyebrow', 0) > 0 else -0.0003),
|
164 |
+
(0, 1, 0, -0.001 if params.get('eyebrow', 0) <= 0 else 0),
|
165 |
+
(0, 2, 0, 0.001 if params.get('eyebrow', 0) <= 0 else 0)
|
166 |
+
]),
|
167 |
+
# Some other ones: https://github.com/jbilcke-hf/FacePoke/issues/22#issuecomment-2408708028
|
168 |
+
# Still need to check how exactly we would control those in the UI,
|
169 |
+
# as we don't have yet segmentation in the frontend UI for those body parts
|
170 |
+
#('lower_lip', [
|
171 |
+
# (0, 19, 1, 0.02)
|
172 |
+
#]),
|
173 |
+
#('upper_lip', [
|
174 |
+
# (0, 20, 1, -0.01)
|
175 |
+
#]),
|
176 |
+
#('neck', [(0, 5, 1, 0.01)]),
|
177 |
+
]
|
178 |
+
|
179 |
+
for param_name, adjustments in modifications:
|
180 |
+
param_value = params.get(param_name, 0)
|
181 |
+
for i, j, k, factor in adjustments:
|
182 |
+
x_d_new[i, j, k] += param_value * factor
|
183 |
+
|
184 |
+
# Special case for pupil_y affecting eyes
|
185 |
+
x_d_new[0, 11, 1] -= params.get('pupil_y', 0) * 0.001
|
186 |
+
x_d_new[0, 15, 1] -= params.get('pupil_y', 0) * 0.001
|
187 |
+
params['eyes'] = params.get('eyes', 0) - params.get('pupil_y', 0) / 2.
|
188 |
+
|
189 |
+
|
190 |
+
# Apply rotation
|
191 |
+
R_new = get_rotation_matrix(
|
192 |
+
processed_data['x_s_info']['pitch'] + params.get('rotate_pitch', 0),
|
193 |
+
processed_data['x_s_info']['yaw'] + params.get('rotate_yaw', 0),
|
194 |
+
processed_data['x_s_info']['roll'] + params.get('rotate_roll', 0)
|
195 |
+
)
|
196 |
+
x_d_new = processed_data['x_s_info']['scale'] * (x_d_new @ R_new) + processed_data['x_s_info']['t']
|
197 |
+
|
198 |
+
# Apply stitching
|
199 |
+
x_d_new = await asyncio.to_thread(self.live_portrait.live_portrait_wrapper.stitching, processed_data['x_s'], x_d_new)
|
200 |
+
|
201 |
+
# Generate the output
|
202 |
+
out = await asyncio.to_thread(self.live_portrait.live_portrait_wrapper.warp_decode, processed_data['f_s'], processed_data['x_s'], x_d_new)
|
203 |
+
I_p = await asyncio.to_thread(self.live_portrait.live_portrait_wrapper.parse_output, out['out'])
|
204 |
+
|
205 |
+
buffered = io.BytesIO()
|
206 |
+
|
207 |
+
####################################################
|
208 |
+
# this part is about stitching the image back into the original.
|
209 |
+
#
|
210 |
+
# this is an expensive operation, not just because of the compute
|
211 |
+
# but because the payload will also be bigger (we send back the whole pic)
|
212 |
+
#
|
213 |
+
# I'm currently running some experiments to do it in the frontend
|
214 |
+
#
|
215 |
+
# --- old way: we do it in the server-side: ---
|
216 |
+
mask_ori = await asyncio.to_thread(prepare_paste_back,
|
217 |
+
processed_data['inference_cfg'].mask_crop, processed_data['crop_info']['M_c2o'],
|
218 |
+
dsize=(processed_data['img_rgb'].shape[1], processed_data['img_rgb'].shape[0])
|
219 |
+
)
|
220 |
+
I_p_to_ori_blend = await asyncio.to_thread(paste_back,
|
221 |
+
I_p[0], processed_data['crop_info']['M_c2o'], processed_data['img_rgb'], mask_ori
|
222 |
+
)
|
223 |
+
result_image = Image.fromarray(I_p_to_ori_blend)
|
224 |
+
|
225 |
+
# --- maybe future way: do it in the frontend: ---
|
226 |
+
#result_image = Image.fromarray(I_p[0])
|
227 |
+
####################################################
|
228 |
+
|
229 |
+
# write it into a webp
|
230 |
+
result_image.save(buffered, format="WebP", quality=82, lossless=False, method=6)
|
231 |
+
|
232 |
+
return [buffered.getvalue(), result_image]
|
233 |
+
|
234 |
+
except Exception as e:
|
235 |
+
raise ValueError(f"Failed to modify image: {str(e)}")
|
236 |
+
|
237 |
@alru_cache(maxsize=512)
|
238 |
async def load_image(self, data):
|
239 |
image = Image.open(io.BytesIO(data))
|
|
|
394 |
# write it into a webp
|
395 |
result_image.save(buffered, format="WebP", quality=82, lossless=False, method=6)
|
396 |
|
397 |
+
return [buffered.getvalue(), result_image]
|
398 |
|
399 |
except Exception as e:
|
400 |
raise ValueError(f"Failed to modify image: {str(e)}")
|