Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
import numpy as np | |
from transformers import AutoModel | |
from theia.decoding import load_feature_stats, prepare_depth_decoder, prepare_mask_generator, decode_everything | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
def run_theia(image): | |
theia_model = AutoModel.from_pretrained("theaiinstitute/theia-base-patch16-224-cdiv", trust_remote_code=True) | |
theia_model = theia_model.to(device) | |
target_model_names = [ | |
"google/vit-huge-patch14-224-in21k", | |
"facebook/dinov2-large", | |
"openai/clip-vit-large-patch14", | |
"facebook/sam-vit-huge", | |
"LiheYoung/depth-anything-large-hf", | |
] | |
feature_means, feature_vars = load_feature_stats(target_model_names, stat_file_root="../../../feature_stats") | |
mask_generator, sam_model = prepare_mask_generator(device) | |
depth_anything_model_name = "LiheYoung/depth-anything-large-hf" | |
depth_anything_decoder, _ = prepare_depth_decoder(depth_anything_model_name, device) | |
images = [image] | |
theia_decode_results, gt_decode_results = decode_everything( | |
theia_model=theia_model, | |
feature_means=feature_means, | |
feature_vars=feature_vars, | |
images=images, | |
mask_generator=mask_generator, | |
sam_model=sam_model, | |
depth_anything_decoder=depth_anything_decoder, | |
pred_iou_thresh=0.5, | |
stability_score_thresh=0.7, | |
gt=True, | |
device=device, | |
) | |
vis_video = np.stack( | |
[np.vstack([tr, gtr]) for tr, gtr in zip(theia_decode_results, gt_decode_results, strict=False)] | |
) | |
return vis_video | |
demo = gr.Interface(fn=run_theia, inputs="image", outputs="image") | |
demo.launch() |