sapiens-demo

Running

App Files Files Community

sapiens-demo / app.py

joselobenitezg

update sapiens link

28eb7cb verified 6 months ago

raw

history blame

6.14 kB

	import os
	import gradio as gr
	import numpy as np
	from PIL import Image
	import cv2
	import spaces

	from inference.seg import process_image_or_video as process_seg
	from inference.pose import process_image_or_video as process_pose
	from inference.depth import process_image_or_video as process_depth
	from inference.normal import process_image_or_video as process_normal
	from config import SAPIENS_LITE_MODELS_PATH

	def update_model_choices(task):
	model_choices = list(SAPIENS_LITE_MODELS_PATH[task.lower()].keys())
	return gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)

	@spaces.GPU(duration=75)
	def process_image(input_image, task, version):
	if isinstance(input_image, np.ndarray):
	input_image = Image.fromarray(input_image)

	if task.lower() == 'seg':
	result = process_seg(input_image, task=task.lower(), version=version)
	elif task.lower() == 'pose':
	result = process_pose(input_image, task=task.lower(), version=version)
	elif task.lower() == 'depth':
	result = process_depth(input_image, task=task.lower(), version=version)
	elif task.lower() == 'normal':
	result = process_normal(input_image, task=task.lower(), version=version)
	else:
	result = None
	print(f"Tarea no soportada: {task}")

	return result

	@spaces.GPU(duration=75)
	def process_video(input_video, task, version):
	cap = cv2.VideoCapture(input_video)
	fps = cap.get(cv2.CAP_PROP_FPS)
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

	output_video = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret:
	break

	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	if task.lower() == 'seg':
	processed_frame = process_seg(frame_rgb, task=task.lower(), version=version)
	elif task.lower() == 'pose':
	processed_frame = process_pose(frame_rgb, task=task.lower(), version=version)
	elif task.lower() == 'depth':
	processed_frame = process_depth(frame_rgb, task=task.lower(), version=version)
	elif task.lower() == 'normal':
	processed_frame = process_normal(frame_rgb, task=task.lower(), version=version)
	else:
	processed_frame = None
	print(f"Tarea no soportada: {task}")
	break

	if processed_frame is not None:
	processed_frame_bgr = cv2.cvtColor(np.array(processed_frame), cv2.COLOR_RGB2BGR)
	output_video.write(processed_frame_bgr)

	cap.release()
	output_video.release()

	return 'output_video.mp4'

	with gr.Blocks() as demo:
	gr.Markdown("""
	<div style="text-align: center; font-size: 35px; font-weight: bold; margin-bottom: 20px;">
	Sapiens Huggingface Space🤗
	</div>
	<div style="text-align: center; font-size: 25px; font-weight: bold; margin-bottom: 20px;">
	Foundation for Human Vision Models
	</div>
	<div style="text-align: center;">
	<a href="https://huggingface.co/facebook/sapiens">🤗 Sapiens Models</a> \|
	<a href="https://github.com/facebookresearch/sapiens/">🌐 Github</a> \|
	<a href="https://www.arxiv.org/abs/2408.12569">📜 arxiv </a> \|
	<a href="https://joselo.ai">🔗Personal Blog </a>
	</div>
	<div style="text-align: center; font-size: 15px; font-weight: bold; margin-bottom: 20px;">
	Sapiens, a family of models for four fundamental human-centric vision tasks - 2D pose estimation, body-part segmentation, depth estimation, and surface normal prediction.
	</div>
	""")
	with gr.Tabs():
	with gr.TabItem('Image'):
	with gr.Row():
	with gr.Column():
	input_image = gr.Image(label="Input Image", type="pil")
	select_task_image = gr.Radio(
	["seg", "pose", "depth", "normal"],
	label="Task",
	info="Choose the task to perform",
	value="seg"
	)
	model_name_image = gr.Dropdown(
	label="Model Version",
	choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
	value="sapiens_0.3b",
	)
	with gr.Column():
	result_image = gr.Image(label="Result")
	run_button_image = gr.Button("Run")

	with gr.TabItem('Video'):
	with gr.Row():
	with gr.Column():
	input_video = gr.Video(label="Input Video")
	select_task_video = gr.Radio(
	["seg", "pose", "depth", "normal"],
	label="Task",
	info="Choose the task to perform",
	value="seg"
	)
	model_name_video = gr.Dropdown(
	label="Model Version",
	choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
	value="sapiens_0.3b",
	)
	with gr.Column():
	result_video = gr.Video(label="Result")
	run_button_video = gr.Button("Run")

	select_task_image.change(fn=update_model_choices, inputs=select_task_image, outputs=model_name_image)
	select_task_video.change(fn=update_model_choices, inputs=select_task_video, outputs=model_name_video)

	run_button_image.click(
	fn=process_image,
	inputs=[input_image, select_task_image, model_name_image],
	outputs=[result_image],
	)

	run_button_video.click(
	fn=process_video,
	inputs=[input_video, select_task_video, model_name_video],
	outputs=[result_video],
	)

	if __name__ == "__main__":
	demo.launch(share=False)