Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
from sd_model_cfg import model_dict | |
from app import process, process0, process1, process2, get_frame_count, cfg_to_input | |
DESCRIPTION = ''' | |
## Rerender A Video | |
### This space provides the function of key frame translation. Full code for full video translation will be released upon the publication of the paper. | |
### To avoid overload, we set limitations to the maximum frame number (8) and the maximum frame resolution (512x768). | |
### The running time of a video of size 512x640 is about 1 minute per keyframe under T4 GPU. | |
### How to use: | |
1. **Run 1st Key Frame**: only translate the first frame, so you can adjust the prompts/models/parameters to find your ideal output appearance before run the whole video. | |
2. **Run Key Frames**: translate all the key frames based on the settings of the first frame | |
3. **Run All**: **Run 1st Key Frame** and **Run Key Frames** | |
4. **Run Propagation**: propogate the key frames to other frames for full video translation. This part will be released upon the publication of the paper. | |
### Tips: | |
1. This method cannot handle large or quick motions where the optical flow is hard to estimate. **Videos with stable motions are preferred**. | |
2. Pixel-aware fusion may not work for large or quick motions. | |
3. Try different color-aware AdaIN settings and even unuse it to avoid color jittering. | |
4. `revAnimated_v11` model for non-photorealstic style, `realisticVisionV20_v20` model for photorealstic style. | |
5. To use your own SD/LoRA model, you may clone the space and specify your model with [sd_model_cfg.py](https://huggingface.co/spaces/Anonymous-sub/Rerender/blob/main/sd_model_cfg.py). | |
6. This method is based on the original SD model. You may need to [convert](https://github.com/huggingface/diffusers/blob/main/scripts/convert_diffusers_to_original_stable_diffusion.py) Diffuser/Automatic1111 models to the original one. | |
**This code is for research purpose and non-commercial use only.** | |
<a href="https://huggingface.co/spaces/Anonymous-sub/Rerender?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank"> | |
<img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a> for no queue on your own hardware.</p> | |
''' | |
MAX_KEYFRAME = 100000000 | |
block = gr.Blocks().queue() | |
with block: | |
with gr.Row(): | |
gr.Markdown(DESCRIPTION) | |
with gr.Row(): | |
with gr.Column(): | |
input_path = gr.Video(label='Input Video', | |
source='upload', | |
format='mp4', | |
visible=True) | |
prompt = gr.Textbox(label='Prompt') | |
seed = gr.Slider(label='Seed', | |
minimum=0, | |
maximum=2147483647, | |
step=1, | |
value=0, | |
randomize=True) | |
run_button = gr.Button(value='Run All') | |
with gr.Row(): | |
run_button1 = gr.Button(value='Run 1st Key Frame') | |
run_button2 = gr.Button(value='Run Key Frames') | |
run_button3 = gr.Button(value='Run Propagation') | |
with gr.Accordion('Advanced options for the 1st frame translation', | |
open=False): | |
image_resolution = gr.Slider( | |
label='Frame rsolution', | |
minimum=256, | |
maximum=512, | |
value=512, | |
step=64, | |
info='To avoid overload, maximum 512') | |
control_strength = gr.Slider(label='ControNet strength', | |
minimum=0.0, | |
maximum=2.0, | |
value=1.0, | |
step=0.01) | |
x0_strength = gr.Slider( | |
label='Denoising strength', | |
minimum=0.00, | |
maximum=1.05, | |
value=0.75, | |
step=0.05, | |
info=('0: fully recover the input.' | |
'1.05: fully rerender the input.')) | |
color_preserve = gr.Checkbox( | |
label='Preserve color', | |
value=True, | |
info='Keep the color of the input video') | |
with gr.Row(): | |
left_crop = gr.Slider(label='Left crop length', | |
minimum=0, | |
maximum=512, | |
value=0, | |
step=1) | |
right_crop = gr.Slider(label='Right crop length', | |
minimum=0, | |
maximum=512, | |
value=0, | |
step=1) | |
with gr.Row(): | |
top_crop = gr.Slider(label='Top crop length', | |
minimum=0, | |
maximum=512, | |
value=0, | |
step=1) | |
bottom_crop = gr.Slider(label='Bottom crop length', | |
minimum=0, | |
maximum=512, | |
value=0, | |
step=1) | |
with gr.Row(): | |
control_type = gr.Dropdown(['HED', 'canny'], | |
label='Control type', | |
value='HED') | |
low_threshold = gr.Slider(label='Canny low threshold', | |
minimum=1, | |
maximum=255, | |
value=100, | |
step=1) | |
high_threshold = gr.Slider(label='Canny high threshold', | |
minimum=1, | |
maximum=255, | |
value=200, | |
step=1) | |
ddim_steps = gr.Slider(label='Steps', | |
minimum=1, | |
maximum=20, | |
value=20, | |
step=1, | |
info='To avoid overload, maximum 20') | |
scale = gr.Slider(label='CFG scale', | |
minimum=0.1, | |
maximum=30.0, | |
value=7.5, | |
step=0.1) | |
sd_model_list = list(model_dict.keys()) | |
sd_model = gr.Dropdown(sd_model_list, | |
label='Base model', | |
value='Stable Diffusion 1.5') | |
a_prompt = gr.Textbox(label='Added prompt', | |
value='best quality, extremely detailed') | |
n_prompt = gr.Textbox( | |
label='Negative prompt', | |
value=('longbody, lowres, bad anatomy, bad hands, ' | |
'missing fingers, extra digit, fewer digits, ' | |
'cropped, worst quality, low quality')) | |
with gr.Accordion('Advanced options for the key fame translation', | |
open=False): | |
interval = gr.Slider( | |
label='Key frame frequency (K)', | |
minimum=1, | |
maximum=1, | |
value=1, | |
step=1, | |
info='Uniformly sample the key frames every K frames') | |
keyframe_count = gr.Slider( | |
label='Number of key frames', | |
minimum=1, | |
maximum=1, | |
value=1, | |
step=1, | |
info='To avoid overload, maximum 8 key frames') | |
use_constraints = gr.CheckboxGroup( | |
[ | |
'shape-aware fusion', 'pixel-aware fusion', | |
'color-aware AdaIN' | |
], | |
label='Select the cross-frame contraints to be used', | |
value=[ | |
'shape-aware fusion', 'pixel-aware fusion', | |
'color-aware AdaIN' | |
]), | |
with gr.Row(): | |
cross_start = gr.Slider( | |
label='Cross-frame attention start', | |
minimum=0, | |
maximum=1, | |
value=0, | |
step=0.05) | |
cross_end = gr.Slider(label='Cross-frame attention end', | |
minimum=0, | |
maximum=1, | |
value=1, | |
step=0.05) | |
style_update_freq = gr.Slider( | |
label='Cross-frame attention update frequency', | |
minimum=1, | |
maximum=100, | |
value=1, | |
step=1, | |
info= | |
('Update the key and value for ' | |
'cross-frame attention every N key frames (recommend N*K>=10)' | |
)) | |
with gr.Row(): | |
warp_start = gr.Slider(label='Shape-aware fusion start', | |
minimum=0, | |
maximum=1, | |
value=0, | |
step=0.05) | |
warp_end = gr.Slider(label='Shape-aware fusion end', | |
minimum=0, | |
maximum=1, | |
value=0.1, | |
step=0.05) | |
with gr.Row(): | |
mask_start = gr.Slider(label='Pixel-aware fusion start', | |
minimum=0, | |
maximum=1, | |
value=0.5, | |
step=0.05) | |
mask_end = gr.Slider(label='Pixel-aware fusion end', | |
minimum=0, | |
maximum=1, | |
value=0.8, | |
step=0.05) | |
with gr.Row(): | |
ada_start = gr.Slider(label='Color-aware AdaIN start', | |
minimum=0, | |
maximum=1, | |
value=0.8, | |
step=0.05) | |
ada_end = gr.Slider(label='Color-aware AdaIN end', | |
minimum=0, | |
maximum=1, | |
value=1, | |
step=0.05) | |
mask_strength = gr.Slider(label='Pixel-aware fusion stength', | |
minimum=0, | |
maximum=1, | |
value=0.5, | |
step=0.01) | |
inner_strength = gr.Slider( | |
label='Pixel-aware fusion detail level', | |
minimum=0.5, | |
maximum=1, | |
value=0.9, | |
step=0.01, | |
info='Use a low value to prevent artifacts') | |
smooth_boundary = gr.Checkbox( | |
label='Smooth fusion boundary', | |
value=True, | |
info='Select to prevent artifacts at boundary') | |
with gr.Accordion('Example configs', open=True): | |
config_dir = 'config' | |
config_list = os.listdir(config_dir) | |
args_list = [] | |
for config in config_list: | |
try: | |
config_path = os.path.join(config_dir, config) | |
args = cfg_to_input(config_path) | |
args_list.append(args) | |
except FileNotFoundError: | |
# The video file does not exist, skipped | |
pass | |
ips = [ | |
prompt, image_resolution, control_strength, color_preserve, | |
left_crop, right_crop, top_crop, bottom_crop, control_type, | |
low_threshold, high_threshold, ddim_steps, scale, seed, | |
sd_model, a_prompt, n_prompt, interval, keyframe_count, | |
x0_strength, use_constraints[0], cross_start, cross_end, | |
style_update_freq, warp_start, warp_end, mask_start, | |
mask_end, ada_start, ada_end, mask_strength, | |
inner_strength, smooth_boundary | |
] | |
with gr.Column(): | |
result_image = gr.Image(label='Output first frame', | |
type='numpy', | |
interactive=False) | |
result_keyframe = gr.Video(label='Output key frame video', | |
format='mp4', | |
interactive=False) | |
with gr.Row(): | |
gr.Examples(examples=args_list, | |
inputs=[input_path, *ips], | |
fn=process0, | |
outputs=[result_image, result_keyframe], | |
cache_examples=True) | |
def input_uploaded(path): | |
frame_count = get_frame_count(path) | |
if frame_count <= 2: | |
raise gr.Error('The input video is too short!' | |
'Please input another video.') | |
default_interval = min(10, frame_count - 2) | |
max_keyframe = min((frame_count - 2) // default_interval, MAX_KEYFRAME) | |
global video_frame_count | |
video_frame_count = frame_count | |
global global_video_path | |
global_video_path = path | |
return gr.Slider.update(value=default_interval, | |
maximum=MAX_KEYFRAME), gr.Slider.update( | |
value=max_keyframe, maximum=max_keyframe) | |
def input_changed(path): | |
frame_count = get_frame_count(path) | |
if frame_count <= 2: | |
return gr.Slider.update(maximum=1), gr.Slider.update(maximum=1) | |
default_interval = min(10, frame_count - 2) | |
max_keyframe = min((frame_count - 2) // default_interval, MAX_KEYFRAME) | |
global video_frame_count | |
video_frame_count = frame_count | |
global global_video_path | |
global_video_path = path | |
return gr.Slider.update(maximum=max_keyframe), \ | |
gr.Slider.update(maximum=max_keyframe) | |
def interval_changed(interval): | |
global video_frame_count | |
if video_frame_count is None: | |
return gr.Slider.update() | |
max_keyframe = (video_frame_count - 2) // interval | |
return gr.Slider.update(value=max_keyframe, maximum=max_keyframe) | |
input_path.change(input_changed, input_path, [interval, keyframe_count]) | |
input_path.upload(input_uploaded, input_path, [interval, keyframe_count]) | |
interval.change(interval_changed, interval, keyframe_count) | |
run_button.click(fn=process, | |
inputs=ips, | |
outputs=[result_image, result_keyframe]) | |
run_button1.click(fn=process1, inputs=ips, outputs=[result_image]) | |
run_button2.click(fn=process2, inputs=ips, outputs=[result_keyframe]) | |
def process3(): | |
raise gr.Error( | |
"Coming Soon. Full code for full video translation will be " | |
"released upon the publication of the paper.") | |
run_button3.click(fn=process3, outputs=[result_keyframe]) | |
block.queue(concurrency_count=1, max_size=20) | |
block.launch(server_name='0.0.0.0') | |