Spaces:
Running
Running
import argparse | |
import gradio as gr | |
from common.utils import ( | |
matcher_zoo, | |
ransac_zoo, | |
change_estimate_geom, | |
run_matching, | |
gen_examples, | |
DEFAULT_RANSAC_METHOD, | |
DEFAULT_SETTING_GEOMETRY, | |
DEFAULT_RANSAC_REPROJ_THRESHOLD, | |
DEFAULT_RANSAC_CONFIDENCE, | |
DEFAULT_RANSAC_MAX_ITER, | |
DEFAULT_MATCHING_THRESHOLD, | |
DEFAULT_SETTING_MAX_FEATURES, | |
DEFAULT_DEFAULT_KEYPOINT_THRESHOLD, | |
) | |
model = "xuelunshen/gim" | |
DESCRIPTION = """ | |
<div style="display: flex; justify-content: center; align-items: center;"> | |
<div style="width: 100%; text-align: center; font-size: 30px;"> | |
<strong>GIM: Learning Generalizable Image Matcher From Internet Videos</strong> | |
</div> | |
</div> | |
<div style="display: flex; justify-content: center; padding: 10px; align-items: center;"> | |
<div style="width: 75%; text-align: center; background-color: #FFFFE0;"> | |
LICENSE: This repository is under the MIT License. This content/model is provided here for research purposes only. Any use beyond this is your sole responsibility and subject to your securing the necessary rights for your purpose. | |
</div> | |
</div> | |
<div style="display: flex; justify-content: center; align-items: flex-start; flex-wrap: wrap;"> | |
<div><a href="https://xuelunshen.com/gim"><img src="https://img.shields.io/badge/Project_Page-3A464E?logo=gumtree" alt='Project Page'></a></div> | |
<div><a href="https://github.com/xuelunshen/gim"><img src="https://img.shields.io/badge/Source_Code-black?logo=Github" alt='Github Source Code'></a></div> | |
<div><a href="https://arxiv.org/abs/2402.11095"><img src="https://img.shields.io/badge/arXiv-2402.11095-b31b1b?logo=arxiv" alt='arxiv'></a></div> | |
<div><a href="https://www.youtube.com/watch?v=FU_MJLD8LeY"><img src="https://img.shields.io/badge/Video-E33122?logo=Youtube" alt='Overview Video'></a></div> | |
<div><a href="https://community.intel.com/t5/Blogs/Tech-Innovation/Artificial-Intelligence-AI/Intel-Labs-Research-Work-Receives-Spotlight-Award-at-Top-AI/post/1575985"><img src="https://img.shields.io/badge/Blog-0071C5?logo=googledocs&logoColor=white" alt='Blog'></a></div> | |
<div><a href="https://zhuanlan.zhihu.com/p/711361901"><img src="https://img.shields.io/badge/Zhihu-1767F5?logo=zhihu&logoColor=white" alt='Blog'></a></div> | |
</div> | |
<p></p> | |
<div style="display: flex; justify-content: center; align-items: flex-start; flex-wrap: wrap;"> | |
<div><a href="https://en.xmu.edu.cn"><img src="https://img.shields.io/badge/Xiamen_University-183F9D?logo=Google%20Scholar&logoColor=white" alt='Intel'></a></div> | |
<div><a href="https://www.intel.com"><img src="https://img.shields.io/badge/Labs-0071C5?logo=intel" alt='Intel'></a></div> | |
<div><a href="https://www.dji.com"><img src="https://img.shields.io/badge/DJI-131313?logo=DJI" alt='Intel'></a></div> | |
</div> | |
<div style="display: flex; justify-content: center; align-items: flex-start;"> | |
<div style="width: 50%; padding: 10px;"> | |
<p align="left"> | |
<span style="color: black;"> | |
You can click on the example images below or upload a pair of images. Running a match takes about 3.5 minutes (because the code is deployed on free CPU). Please wait patiently and keep the window in the foreground during operation | |
</span> | |
<br> | |
<span style="color: gray; font-size: smaller;"> | |
Thanks to <a href="https://huggingface.co/spaces/Realcat/image-matching-webui" style="color: gray;">https://huggingface.co/spaces/Realcat/image-matching-webui</a> for providing the UI framework. | |
</span> | |
</p> | |
</div> | |
<div style="width: 50%; padding: 10px;"> | |
<p align="left"> | |
<span style="color: black;"> | |
你可以点击下面的示例图片或者上传图片 | |
</span> | |
<br> | |
<span style="color: black;"> | |
运行一次匹配需要 3.5 分钟左右的时间 (因为代码部署在免费cpu上) | |
</span> | |
<br> | |
<span style="color: black;"> | |
请你耐心等待, 运行期间保持窗口在最前面 | |
</span> | |
<br> | |
<span style="color: gray; font-size: smaller;"> | |
感谢 <a href="https://huggingface.co/spaces/Realcat/image-matching-webui" style="color: gray;">https://huggingface.co/spaces/Realcat/image-matching-webui</a> 提供的交互界面. | |
</span> | |
</p> | |
</div> | |
</div> | |
""" | |
def ui_change_imagebox(choice): | |
""" | |
Updates the image box with the given choice. | |
Args: | |
choice (list): The list of image sources to be displayed in the image box. | |
Returns: | |
dict: A dictionary containing the updated value, sources, and type for the image box. | |
""" | |
return { | |
"value": None, # The updated value of the image box | |
"sources": choice, # The list of image sources to be displayed | |
"__type__": "update", # The type of update for the image box | |
} | |
def ui_reset_state(*args): | |
""" | |
Reset the state of the UI. | |
Returns: | |
tuple: A tuple containing the initial values for the UI state. | |
""" | |
key = list(matcher_zoo.keys())[0] # Get the first key from matcher_zoo | |
return ( | |
None, # image0 | |
None, # image1 | |
DEFAULT_MATCHING_THRESHOLD, # matching_threshold | |
DEFAULT_SETTING_MAX_FEATURES, # max_features | |
DEFAULT_DEFAULT_KEYPOINT_THRESHOLD, # keypoint_threshold | |
key, # matcher | |
ui_change_imagebox("upload"), # input image0 | |
ui_change_imagebox("upload"), # input image1 | |
"upload", # match_image_src | |
None, # keypoints | |
None, # raw matches | |
None, # ransac matches | |
{}, # matches result info | |
{}, # matcher config | |
None, # warped image | |
{}, # geometry result | |
DEFAULT_RANSAC_METHOD, # ransac_method | |
DEFAULT_RANSAC_REPROJ_THRESHOLD, # ransac_reproj_threshold | |
DEFAULT_RANSAC_CONFIDENCE, # ransac_confidence | |
DEFAULT_RANSAC_MAX_ITER, # ransac_max_iter | |
DEFAULT_SETTING_GEOMETRY, # geometry | |
) | |
# "footer {visibility: hidden}" | |
def run(config): | |
""" | |
Runs the application. | |
Args: | |
config (dict): A dictionary containing configuration parameters for the application. | |
Returns: | |
None | |
""" | |
with gr.Blocks(css="style.css") as app: | |
gr.Markdown(DESCRIPTION) | |
with gr.Row(equal_height=False): | |
with gr.Column(): | |
with gr.Row(): | |
matcher_list = gr.Dropdown( | |
choices=list(matcher_zoo.keys()), | |
value="gim", | |
label="Matching Model", | |
interactive=True, | |
) | |
match_image_src = gr.Radio( | |
["upload", "webcam"], | |
label="Image Source", | |
value="upload", | |
) | |
with gr.Row(): | |
input_image0 = gr.Image( | |
label="Image 0", | |
type="numpy", | |
image_mode="RGB", | |
height=300, | |
interactive=True, | |
) | |
input_image1 = gr.Image( | |
label="Image 1", | |
type="numpy", | |
image_mode="RGB", | |
height=300, | |
interactive=True, | |
) | |
with gr.Row(): | |
button_reset = gr.Button(value="Reset") | |
button_run = gr.Button(value="Run Match", variant="primary") | |
with gr.Accordion("Advanced Setting", open=False): | |
with gr.Accordion("Matching Setting", open=True): | |
with gr.Row(): | |
match_setting_threshold = gr.Slider( | |
minimum=0.0, | |
maximum=1, | |
step=0.001, | |
label="Match thres.", | |
value=0.1, | |
) | |
match_setting_max_features = gr.Slider( | |
minimum=10, | |
maximum=10000, | |
step=10, | |
label="Max features", | |
value=4096, | |
) | |
# TODO: add line settings | |
with gr.Row(): | |
detect_keypoints_threshold = gr.Slider( | |
minimum=0, | |
maximum=1, | |
step=0.001, | |
label="Keypoint thres.", | |
value=0.01, | |
) | |
detect_line_threshold = gr.Slider( | |
minimum=0.1, | |
maximum=1, | |
step=0.01, | |
label="Line thres.", | |
value=0.2, | |
) | |
# matcher_lists = gr.Radio( | |
# ["NN-mutual", "Dual-Softmax"], | |
# label="Matcher mode", | |
# value="NN-mutual", | |
# ) | |
with gr.Accordion("RANSAC Setting", open=True): | |
with gr.Row(equal_height=False): | |
# enable_ransac = gr.Checkbox(label="Enable RANSAC") | |
ransac_method = gr.Dropdown( | |
choices=ransac_zoo.keys(), | |
value=DEFAULT_RANSAC_METHOD, | |
label="RANSAC Method", | |
interactive=True, | |
) | |
ransac_reproj_threshold = gr.Slider( | |
minimum=0.0, | |
maximum=12, | |
step=0.01, | |
label="Ransac Reproj threshold", | |
value=8.0, | |
) | |
ransac_confidence = gr.Slider( | |
minimum=0.0, | |
maximum=1, | |
step=0.00001, | |
label="Ransac Confidence", | |
value=0.99999, | |
) | |
ransac_max_iter = gr.Slider( | |
minimum=0.0, | |
maximum=100000, | |
step=100, | |
label="Ransac Iterations", | |
value=10000, | |
) | |
with gr.Accordion("Geometry Setting", open=False): | |
with gr.Row(equal_height=False): | |
# show_geom = gr.Checkbox(label="Show Geometry") | |
choice_estimate_geom = gr.Radio( | |
["Fundamental", "Homography"], | |
label="Reconstruct Geometry", | |
value=DEFAULT_SETTING_GEOMETRY, | |
) | |
# with gr.Column(): | |
# collect inputs | |
inputs = [ | |
input_image0, | |
input_image1, | |
match_setting_threshold, | |
match_setting_max_features, | |
detect_keypoints_threshold, | |
matcher_list, | |
ransac_method, | |
ransac_reproj_threshold, | |
ransac_confidence, | |
ransac_max_iter, | |
choice_estimate_geom, | |
] | |
# Add some examples | |
with gr.Row(): | |
# Example inputs | |
gr.Examples( | |
examples=gen_examples(), | |
inputs=inputs, | |
outputs=[], | |
fn=run_matching, | |
cache_examples=False, | |
label=( | |
"Examples (click one of the images below to Run" | |
" Match)" | |
), | |
) | |
with gr.Accordion("Open for More!", open=False): | |
gr.Markdown( | |
f""" | |
<h3>Supported Algorithms</h3> | |
{", ".join(matcher_zoo.keys())} | |
""" | |
) | |
with gr.Column(): | |
output_keypoints = gr.Image(label="Keypoints", type="numpy") | |
output_matches_raw = gr.Image(label="Raw Matches", type="numpy") | |
output_matches_ransac = gr.Image( | |
label="Ransac Matches", type="numpy" | |
) | |
output_wrapped = gr.Image( | |
label="Wrapped Pair", type="numpy" | |
) | |
with gr.Accordion( | |
"Open for More: Matches Statistics", open=False | |
): | |
matches_result_info = gr.JSON(label="Matches Statistics") | |
matcher_info = gr.JSON(label="Match info") | |
with gr.Accordion( | |
"Open for More: Geometry info", open=False | |
): | |
geometry_result = gr.JSON( | |
label="Reconstructed Geometry" | |
) | |
# callbacks | |
match_image_src.change( | |
fn=ui_change_imagebox, | |
inputs=match_image_src, | |
outputs=input_image0, | |
) | |
match_image_src.change( | |
fn=ui_change_imagebox, | |
inputs=match_image_src, | |
outputs=input_image1, | |
) | |
# collect outputs | |
outputs = [ | |
output_keypoints, | |
output_matches_raw, | |
output_matches_ransac, | |
matches_result_info, | |
matcher_info, | |
geometry_result, | |
output_wrapped, | |
] | |
# button callbacks | |
button_run.click(fn=run_matching, inputs=inputs, outputs=outputs) | |
# Reset images | |
reset_outputs = [ | |
input_image0, | |
input_image1, | |
match_setting_threshold, | |
match_setting_max_features, | |
detect_keypoints_threshold, | |
matcher_list, | |
input_image0, | |
input_image1, | |
match_image_src, | |
output_keypoints, | |
output_matches_raw, | |
output_matches_ransac, | |
matches_result_info, | |
matcher_info, | |
output_wrapped, | |
geometry_result, | |
ransac_method, | |
ransac_reproj_threshold, | |
ransac_confidence, | |
ransac_max_iter, | |
choice_estimate_geom, | |
] | |
button_reset.click( | |
fn=ui_reset_state, inputs=inputs, outputs=reset_outputs | |
) | |
# estimate geo | |
choice_estimate_geom.change( | |
fn=change_estimate_geom, | |
inputs=[ | |
input_image0, | |
input_image1, | |
geometry_result, | |
choice_estimate_geom, | |
], | |
outputs=[output_wrapped, geometry_result], | |
) | |
import datetime | |
print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 'app.queue().launch start') | |
app.queue().launch(share=False) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
"--config_path", | |
type=str, | |
default="config.yaml", | |
help="configuration file path", | |
) | |
args = parser.parse_args() | |
config = None | |
run(config) | |