import os import sys import os.path as osp from pathlib import Path import cv2 import gradio as gr import torch import math import spaces from huggingface_hub import hf_hub_download try: import mmpose except: os.system('pip install /home/user/app/main/transformer_utils') hf_hub_download(repo_id="caizhongang/SMPLer-X", filename="smpler_x_h32.pth.tar", local_dir="/home/user/app/pretrained_models") os.system('cp -rf /home/user/app/assets/conversions.py /usr/local/lib/python3.10/site-packages/torchgeometry/core/conversions.py') DEFAULT_MODEL='smpler_x_h32' OUT_FOLDER = '/home/user/app/demo_out' os.makedirs(OUT_FOLDER, exist_ok=True) num_gpus = 1 if torch.cuda.is_available() else -1 print("!!!", torch.cuda.is_available()) print(torch.cuda.device_count()) print(torch.version.cuda) index = torch.cuda.current_device() print(index) print(torch.cuda.get_device_name(index)) # from main.inference import Inferer # inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER) @spaces.GPU(enable_queue=True) def infer(video_input, in_threshold=0.5, num_people="Single person", render_mesh=False): from main.inference import Inferer inferer = Inferer(DEFAULT_MODEL, num_gpus, OUT_FOLDER) os.system(f'rm -rf {OUT_FOLDER}/*') multi_person = False if (num_people == "Single person") else True cap = cv2.VideoCapture(video_input) fps = math.ceil(cap.get(5)) width = int(cap.get(3)) height = int(cap.get(4)) fourcc = cv2.VideoWriter_fourcc(*'mp4v') video_path = osp.join(OUT_FOLDER, f'out.m4v') final_video_path = osp.join(OUT_FOLDER, f'out.mp4') video_output = cv2.VideoWriter(video_path, fourcc, fps, (width, height)) success = 1 frame = 0 while success: success, original_img = cap.read() if not success: break frame += 1 img, mesh_paths, smplx_paths = inferer.infer(original_img, in_threshold, frame, multi_person, not(render_mesh)) video_output.write(img) yield img, None, None, None cap.release() video_output.release() cv2.destroyAllWindows() os.system(f'ffmpeg -i {video_path} -c copy {final_video_path}') #Compress mesh and smplx files save_path_mesh = os.path.join(OUT_FOLDER, 'mesh') save_mesh_file = os.path.join(OUT_FOLDER, 'mesh.zip') os.makedirs(save_path_mesh, exist_ok= True) save_path_smplx = os.path.join(OUT_FOLDER, 'smplx') save_smplx_file = os.path.join(OUT_FOLDER, 'smplx.zip') os.makedirs(save_path_smplx, exist_ok= True) os.system(f'zip -r {save_mesh_file} {save_path_mesh}') os.system(f'zip -r {save_smplx_file} {save_path_smplx}') yield img, video_path, save_mesh_file, save_smplx_file TITLE = '''
Note: You can drop a video at the panel (or select one of the examples) to obtain the 3D parametric reconstructions of the detected humans.
''' with gr.Blocks(title="SMPLer-X", css=".gradio-container") as demo: gr.Markdown(TITLE) gr.HTML(VIDEO) gr.Markdown(DESCRIPTION) with gr.Row(): with gr.Column(): video_input = gr.Video(label="Input video", elem_classes="video") threshold = gr.Slider(0, 1.0, value=0.5, label='BBox detection threshold') with gr.Column(scale=2): num_people = gr.Radio( choices=["Single person", "Multiple people"], value="Single person", label="Number of people", info="Choose how many people are there in the video. Choose 'single person' for faster inference.", interactive=True, scale=1,) gr.HTML("""