File size: 14,919 Bytes
a57c6eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
#from __future__ import absolute_import
import sys
import io
import os
sys.argv = ['GPT_eval_multi.py']

# 将项目根目录添加到sys.path中
PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(1, PROJECT_ROOT)
CKPT_ROOT="/cfs-datasets/public_models/motion"

from .options import option_transformer as option_trans

import sys
print(sys.path[0])

import clip
import torch
import cv2
import numpy as np
from  .models import vqvae as vqvae
from  .models import t2m_trans as trans
import warnings
from  .visualization import plot_3d_global as plot_3d
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.colors as mcolors
from tqdm import tqdm
from mpl_toolkits.mplot3d import Axes3D
from PIL import Image

import time
import random


warnings.filterwarnings('ignore')
from matplotlib.axes._axes import _log as matplotlib_axes_logger
matplotlib_axes_logger.setLevel('ERROR')

from math import cos,sin,radians

args = option_trans.get_args_parser()

args.dataname = 't2m'
args.resume_pth = os.path.join(CKPT_ROOT,'pretrained/VQVAE/net_last.pth')
args.resume_trans = os.path.join(CKPT_ROOT,'pretrained/VQTransformer_corruption05/net_best_fid.pth')
args.down_t = 2
args.depth = 3
args.block_size = 51

def replace_space_with_underscore(s):
    return s.replace(' ', '_')


def Rz(angle):
  theta=radians(angle)
  return np.array([[cos(theta), -sin(theta), 0],
             [sin(theta), cos(theta),  0],
             [0,          0,           1]])


def Rx(angle):
  theta=radians(angle)
  return np.array(
    [[1,   0,         0],
    [0 , cos(theta), -sin(theta)],
    [0,  sin(theta), cos(theta)]])

def generate_cuid():
    timestamp = hex(int(time.time() * 1000))[2:]
    random_str = hex(random.randint(0, 0xfffff))[2:]
    return (timestamp + random_str).zfill(10)

def smpl_to_openpose18(smpl_keypoints):
    '''
    22关键点SMPL对应关系解释 
    [0, 2, 5, 8, 11]
    这个列表表示SMPL模型中左腿的连接方式,从骨盆(0号关键点)开始,连接左大腿(2号关键点)、左小腿(5号关键点)、左脚(8号关键点)和左脚尖(11号关键点)。
    
    [0, 1, 4, 7, 10]
    这个列表表示SMPL模型中右腿的连接方式,从骨盆(0号关键点)开始,连接右大腿(1号关键点)、右小腿(4号关键点)、右脚(7号关键点)和右脚尖(10号关键点)。
    
    [0, 3, 6, 9, 12, 15]
    这个列表表示SMPL模型中躯干的连接方式,从骨盆(0号关键点)开始,连接脊柱(3号关键点)、颈部(6号关键点)、头部(9号关键点)、左肩膀(12号关键点)、右肩膀(15号关键点)。
    
    [9, 14, 17, 19, 21]
    这个列表表示SMPL模型中左臂的连接方式,从左肩膀(9号关键点)开始,连接左上臂(14号关键点)、左前臂(17号关键点)、左手腕(19号关键点)和左手(21号关键点)。
    
    [9, 13, 16, 18, 20]
    这个列表表示SMPL模型中右臂的连接方式,从右肩膀(9号关键点)开始,连接右上臂(13号关键点)、右前臂(16号关键点)、右手腕(18号关键点)和右手(20号关键点)。
    
    目前转Openpose忽略掉了SMPL的肩膀关键点
    '''
    openpose_keypoints = np.zeros((18, 3))
    openpose_keypoints[0] = smpl_keypoints[9] # nose
    openpose_keypoints[0][1] = openpose_keypoints[0][1]+0.3 # 


    openpose_keypoints[1] = smpl_keypoints[6] # neck
    openpose_keypoints[2] = smpl_keypoints[16] # right shoulder 
    openpose_keypoints[3] = smpl_keypoints[18] # right elbow
    openpose_keypoints[4] = smpl_keypoints[20] # right wrist
    openpose_keypoints[5] = smpl_keypoints[17] # left shoulder
    openpose_keypoints[6] = smpl_keypoints[19] # left elbow
    openpose_keypoints[7] = smpl_keypoints[21] # left wrist

    #TODO: Experiment,将neck的关键点抬高&&将nose的关键点相对高度关系与neck保持一致
    openpose_keypoints[1][0]=(openpose_keypoints[2][0]+openpose_keypoints[5][0])/2
    openpose_keypoints[1][1]=(openpose_keypoints[2][1]+openpose_keypoints[5][1])/2
    openpose_keypoints[1][2]=(openpose_keypoints[2][2]+openpose_keypoints[5][2])/2
    openpose_keypoints[0][1] = openpose_keypoints[1][1]+0.3 # 


    openpose_keypoints[8] = smpl_keypoints[1] # right hip
    openpose_keypoints[9] = smpl_keypoints[4] # right knee
    openpose_keypoints[10] = smpl_keypoints[7] # right ankle
    openpose_keypoints[11] = smpl_keypoints[2] # left hip
    openpose_keypoints[12] = smpl_keypoints[5] # left knee
    openpose_keypoints[13] = smpl_keypoints[8] # left ankle

    #TODO: Experiment,手工指定脸部关键点测试是否能够指定身体朝向
    #openpose_keypoints[0][0] = openpose_keypoints[0][0]+0.3#测试0坐标轴方向(水平向右)
    #openpose_keypoints[0][2] = openpose_keypoints[0][2]#测试2坐标轴方向(向外
    #openpose_keypoints[0][1] = openpose_keypoints[0][1]+0.5#测试1坐标轴方向(垂直向上
    openpose_keypoints[14] = openpose_keypoints[0] # right eye
    openpose_keypoints[14][1]=openpose_keypoints[14][1]+0.05
    openpose_keypoints[14][0]=openpose_keypoints[14][0]+0.3*(openpose_keypoints[2][0]-openpose_keypoints[1][0])
    openpose_keypoints[14][2]=openpose_keypoints[14][2]+0.3*(openpose_keypoints[2][2]-openpose_keypoints[1][2])

    openpose_keypoints[15] = openpose_keypoints[0] # left eye
    openpose_keypoints[15][1]=openpose_keypoints[15][1]+0.05
    openpose_keypoints[15][0]=openpose_keypoints[15][0]+0.3*(openpose_keypoints[5][0]-openpose_keypoints[1][0])
    openpose_keypoints[15][2]=openpose_keypoints[15][2]+0.3*(openpose_keypoints[5][2]-openpose_keypoints[1][2])
    
    openpose_keypoints[16] = openpose_keypoints[0] # right ear
    openpose_keypoints[16][0]=openpose_keypoints[16][0]+0.7*(openpose_keypoints[2][0]-openpose_keypoints[1][0])
    openpose_keypoints[16][2]=openpose_keypoints[16][2]+0.7*(openpose_keypoints[2][2]-openpose_keypoints[1][2])    
    
    openpose_keypoints[17] = openpose_keypoints[0] # left ear
    openpose_keypoints[17][0]=openpose_keypoints[17][0]+0.7*(openpose_keypoints[5][0]-openpose_keypoints[1][0])
    openpose_keypoints[17][2]=openpose_keypoints[17][2]+0.7*(openpose_keypoints[5][2]-openpose_keypoints[1][2])    
    
    return openpose_keypoints






# TODO: debug only, need to be deleted before unload
## load clip model and datasets
clip_model, clip_preprocess = clip.load("ViT-B/32", device=torch.device('cuda'), jit=False, download_root=CKPT_ROOT)  # Must set jit=False for training
clip.model.convert_weights(clip_model)  # Actually this line is unnecessary since clip by default already on float16
clip_model.eval()
for p in clip_model.parameters():
    p.requires_grad = False
print("loaded CLIP model")
net = vqvae.HumanVQVAE(args, ## use args to define different parameters in different quantizers
                    args.nb_code,
                    args.code_dim,
                    args.output_emb_width,
                    args.down_t,
                    args.stride_t,
                    args.width,
                    args.depth,
                    args.dilation_growth_rate)


trans_encoder = trans.Text2Motion_Transformer(num_vq=args.nb_code,
                                embed_dim=1024,
                                clip_dim=args.clip_dim,
                                block_size=args.block_size,
                                num_layers=9,
                                n_head=16,
                                drop_out_rate=args.drop_out_rate,
                                fc_rate=args.ff_rate)


print ('loading checkpoint from {}'.format(args.resume_pth))
ckpt = torch.load(args.resume_pth, map_location='cpu')
net.load_state_dict(ckpt['net'], strict=True)
net.eval()
net.cuda()

print ('loading transformer checkpoint from {}'.format(args.resume_trans))
ckpt = torch.load(args.resume_trans, map_location='cpu')
trans_encoder.load_state_dict(ckpt['trans'], strict=True)
trans_encoder.eval()
trans_encoder.cuda()

mean = torch.from_numpy(np.load(os.path.join(CKPT_ROOT,'./checkpoints/t2m/VQVAEV3_CB1024_CMT_H1024_NRES3/meta/mean.npy'))).cuda()
std = torch.from_numpy(np.load(os.path.join(CKPT_ROOT,'./checkpoints/t2m/VQVAEV3_CB1024_CMT_H1024_NRES3/meta/std.npy'))).cuda()



def get_open_pose(text,height,width,save_path,video_length):
    CKPT_ROOT = os.path.dirname(os.path.abspath(__file__))

    clip_text=[text]
    print(f"Motion Prompt: {text}")
    # cuid=generate_cuid()
    # print(f"Motion Generation cuid: {cuid}")

    # clip_text = ["the person jump and spin twice,then running straght and sit down. "]  #支持单个token的生成

    # change the text here



    text = clip.tokenize(clip_text, truncate=False).cuda()
    feat_clip_text = clip_model.encode_text(text).float()
    index_motion = trans_encoder.sample(feat_clip_text[0:1], False)
    pred_pose = net.forward_decoder(index_motion)

    from utils.motion_process import recover_from_ric
    pred_xyz = recover_from_ric((pred_pose*std+mean).float(), 22) 
    xyz = pred_xyz.reshape(1, -1, 22, 3) 

    np.save('motion.npy', xyz.detach().cpu().numpy())


    pose_vis = plot_3d.draw_to_batch(xyz.detach().cpu().numpy(),clip_text, ['smpl.gif'])

    res=xyz.detach().cpu().numpy()
    points_3d_list=res[0]
    frame_num=points_3d_list.shape[0]

    open_pose_list=np.array(points_3d_list)
    print("The total SMPL sequence shape is : "+str(open_pose_list.shape))

    max_val = np.max(open_pose_list, axis=(0, 1))
    min_val = np.min(open_pose_list, axis=(0, 1))

    print("三维坐标在坐标系上的最大值:", max_val)
    print("三维坐标在坐标系上的最小值:", min_val)


    check= smpl_to_openpose18(open_pose_list[0]) # 18个关键点
    print("********SMPL_2_OpenPose_List(14/18)********")
    print(check)
    print("*************************")
    print(f"Total Frame Number: {frame_num}")
    img_list=[]
    for step in tqdm(range(0,frame_num)):
        # 生成图像
        dpi=84
        fig =plt.figure(figsize=(width/dpi, height/dpi), dpi=dpi)
        ax = fig.add_subplot(111, projection='3d')
        limits=2

        ax.set_xlim(-limits*0.7, limits*0.7)
        ax.set_ylim(0, limits*1.5)#上下
        ax.set_zlim(0, limits*1.5)# 前后
        ax.grid(b=False)
        #ax.dist = 1
        ax.set_box_aspect([1.4, 1.5, 1.5],zoom=3.5)#  坐标轴比例 TODO:这个比例可能有问题,会出现超出坐标范围的bug

        # 关键点坐标,每行包含(x, y, z)
        keypoints = smpl_to_openpose18(open_pose_list[step]) # 18个关键点

        # 运动学链 目前只用到body部分
        kinematic_chain = [(0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13), (0, 14), (14, 16), (0, 15), (15, 17)]
        #kinematic_chain = [(0, 1), (1, 2), (2, 3), (3, 4), (1, 5), (5, 6), (6, 7), (1, 8), (8, 9), (9, 10), (1, 11), (11, 12), (12, 13)]

        # 颜色RGB

        colors = [(0, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 0), (255, 0, 255), (255, 192, 203), (0, 165, 255), (19, 69, 139), (173, 216, 230), (34, 139, 34), (0, 0, 128), (184, 134, 11), (139, 0, 139), (0, 100, 0), (0, 255, 255), (0, 255, 0), (216, 191, 216), (255, 255, 224)]
        #colors=[(0, 0, 255), (0, 255, 255), (0, 255, 0), (255, 0, 0), (255, 0, 255), (255, 192, 203), (0, 165, 255), (19, 69, 139), (173, 216, 230), (34, 139, 34), (0, 0, 128), (184, 134, 11), (139, 0, 139), (0, 100, 0)]
        
        #18点
        joint_colors=[(255,0,0),(255,85,0),(255,170,0),(255,255,0),(170,255,0),(85,255,0),(0,255,0),(0,255,85),(0,255,170),(0,255,255),(0,170,255),(0,85,255),(0,0,255),(85,0,255),(170,0,255),(255,0,255),(255,0,170),(255,0,85),(255,0,0)]
        #14点主干
        #joint_colors=[(255,0,0),(255,85,0),(255,170,0),(255,255,0),(170,255,0),(85,255,0),(0,255,0),(0,255,85),(0,255,170),(0,255,255),(0,170,255),(0,85,255),(0,0,255),(85,0,255),(170,0,255)]
        #运动链连线是joint颜色的60%
        
        
        #plt颜色在0-1之间
        rgb_color2=[]
        joint_rgb_color2=[]
        kinematic_chain_rgb_color2=[]
        for color in joint_colors:
            joint_rgb_color2.append(tuple([x/255 for x in color]))
            kinematic_chain_rgb_color2.append(tuple([x*0.6/255 for x in color]))    #运动链连线是joint颜色的60%

        # 可视化结果
        for i in range(0,18):
            # 绘制关键点
            ax.scatter(keypoints[i][0], keypoints[i][1], keypoints[i][2], s=50, c=joint_rgb_color2[i], marker='o')

            # 绘制运动学链
            for j in range(len(kinematic_chain)):
                if kinematic_chain[j][1] == i:
                    ax.plot([keypoints[kinematic_chain[j][0]][0], keypoints[kinematic_chain[j][1]][0]], [keypoints[kinematic_chain[j][0]][1], keypoints[kinematic_chain[j][1]][1]], [keypoints[kinematic_chain[j][0]][2], keypoints[kinematic_chain[j][1]][2]], c=kinematic_chain_rgb_color2[i], linewidth=5)

        # 调整视角
        ax.view_init(elev=110, azim=-90)
        plt.axis('off')

        
        # 保存图片
        # 将图像数据输出为图像数组
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        image_tmp_path=str(f"{save_path}/{str(step)}.jpg")
        plt.savefig(os.path.join(CKPT_ROOT,image_tmp_path))#RGB
        img=cv2.imread(os.path.join(CKPT_ROOT,image_tmp_path))
        img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img_list.append(img)
    res=[]
    if len(img_list)>=video_length:
        key_frame_sample_step=int(len(img_list)/video_length)
    else:
        print("ERROR: video length is too long")
        key_frame_sample_step=1

    for i in range(0,len(img_list),key_frame_sample_step):
        res.append(img_list[i])
    
    return res



def offline_get_open_pose(text,motion_text,height,width,save_path):
    #motion_text=text

    clip_text=[text]
    print(f"Motion Prompt: {text}")
    cuid=generate_cuid()
    print(f"Motion Generation cuid: {cuid}")

    # clip_text = ["the person jump and spin twice,then running straght and sit down. "]  #支持单个token的生成

    # change the text here



    text = clip.tokenize(clip_text, truncate=False).cuda()
    feat_clip_text = clip_model.encode_text(text).float()
    index_motion = trans_encoder.sample(feat_clip_text[0:1], False)
    pred_pose = net.forward_decoder(index_motion)

    from utils.motion_process import recover_from_ric
    pred_xyz = recover_from_ric((pred_pose*std+mean).float(), 22) 
    xyz = pred_xyz.reshape(1, -1, 22, 3) 
    res=xyz.detach().cpu().numpy()
    np.save(f'{save_path}/{replace_space_with_underscore(motion_text)}.npy', res)


    pose_vis = plot_3d.draw_to_batch(res,clip_text, ['smpl.gif'])
    



if __name__ == "__main__":

    text="walk around, jump, run straght."
    pose = get_open_pose(text,512,512)
    #pdb.set_trace()