Diffutoon / app_new.py
kevinwang676's picture
Update app_new.py
88590f8 verified
import subprocess
subprocess.run(
'pip install numpy==1.26.4',
shell=True
)
import os
import gradio as gr
import torch
import spaces
import random
from PIL import Image
import numpy as np
from glob import glob
from pathlib import Path
from typing import Optional
#Core functions from https://github.com/modelscope/DiffSynth-Studio
from diffsynth import save_video, ModelManager, SVDVideoPipeline
from diffsynth import SDVideoPipeline, ControlNetConfigUnit, VideoData, save_frames
from diffsynth.extensions.RIFE import RIFESmoother
import cv2
# Constants
MAX_SEED = np.iinfo(np.int32).max
CSS = """
footer {
visibility: hidden;
}
"""
JS = """function () {
gradioURL = window.location.href
if (!gradioURL.endsWith('?__theme=dark')) {
window.location.replace(gradioURL + '?__theme=dark');
}
}"""
# Ensure model and scheduler are initialized in GPU-enabled function
if torch.cuda.is_available():
model_manager2 = ModelManager(torch_dtype=torch.float16, device="cuda")
model_manager2.load_textual_inversions("models/textual_inversion")
model_manager2.load_models([
"models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors",
"models/AnimateDiff/mm_sd_v15_v2.ckpt",
"models/ControlNet/control_v11p_sd15_lineart.pth",
"models/ControlNet/control_v11f1e_sd15_tile.pth",
"models/RIFE/flownet.pkl"
])
pipe2 = SDVideoPipeline.from_model_manager(
model_manager2,
[
ControlNetConfigUnit(
processor_id="lineart",
model_path="models/ControlNet/control_v11p_sd15_lineart.pth",
scale=0.5
),
ControlNetConfigUnit(
processor_id="tile",
model_path="models/ControlNet/control_v11f1e_sd15_tile.pth",
scale=0.5
)
]
)
smoother = RIFESmoother.from_model_manager(model_manager2)
def update_frames(video_in):
up_video = VideoData(
video_file=video_in)
frame_len = len(up_video)
video_path = video_in
cap = cv2.VideoCapture(video_path)
fps_in = cap.get(cv2.CAP_PROP_FPS)
width_in = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height_in = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
cap.release()
return gr.update(maximum=frame_len), gr.update(value=fps_in), gr.update(value=width_in), gr.update(value=height_in)
@spaces.GPU(duration=180)
def generate(
video_in,
image_in,
prompt: str = "best quality",
seed: int = -1,
num_inference_steps: int = 10,
num_frames: int = 30,
height: int = 512,
width: int = 512,
animatediff_batch_size: int = 32,
animatediff_stride: int = 16,
fps_id: int = 25,
output_folder: str = "outputs",
progress=gr.Progress(track_tqdm=True)):
video = ""
if seed == -1:
seed = random.randint(0, MAX_SEED)
torch.manual_seed(seed)
os.makedirs(output_folder, exist_ok=True)
base_count = len(glob(os.path.join(output_folder, "*.mp4")))
video_path = os.path.join(output_folder, f"{base_count:06d}.mp4")
up_video = VideoData(
video_file=video_in,
height=height, width=width)
input_video = [up_video[i] for i in range(1, num_frames)]
video = pipe2(
prompt=prompt,
negative_prompt="verybadimagenegative_v1.3",
cfg_scale=3,
clip_skip=2,
controlnet_frames=input_video,
num_frames=len(input_video),
num_inference_steps=num_inference_steps,
height=height,
width=width,
animatediff_batch_size=animatediff_batch_size,
animatediff_stride=animatediff_stride,
unet_batch_size=8,
controlnet_batch_size=8,
vram_limit_level=0,
)
video = smoother(video)
save_video(video, video_path, fps=fps_id)
return video_path, seed
examples = [
['./dancing.mp4', None, "best quality, perfect anime illustration, light, a girl is dancing, smile, solo"],
]
# Gradio Interface
with gr.Blocks(css=CSS, js=JS, theme="soft") as demo:
gr.HTML("<h1><center>Exvideo📽️Diffutoon</center></h1>")
gr.HTML("""
<p><center>Exvideo and Diffutoon video generation
<br><b>Update</b>: Output resize, Frames length control.
<br><b>Note</b>: ZeroGPU limited, Set the parameters appropriately.</center></p>
""")
with gr.Row():
video_in = gr.Video(label='Upload Video', height=600, scale=2)
image_in = gr.Image(label='Upload Image', height=600, scale=2, image_mode="RGB", type="filepath", visible=False)
video = gr.Video(label="Generated Video", height=600, scale=2)
with gr.Column(scale=1):
seed = gr.Slider(
label="Seed (-1 Random)",
minimum=-1,
maximum=MAX_SEED,
step=1,
value=-1,
)
num_inference_steps = gr.Slider(
label="Inference steps",
info="Inference steps",
step=1,
value=10,
minimum=1,
maximum=50,
)
num_frames = gr.Slider(
label="Num frames",
info="Output Frames",
step=1,
value=30,
minimum=1,
maximum=128,
)
with gr.Row():
height = gr.Slider(
label="Height",
step=8,
value=512,
minimum=256,
maximum=2560,
)
width = gr.Slider(
label="Width",
step=8,
value=512,
minimum=256,
maximum=2560,
)
with gr.Accordion("Diffutoon Options", open=False):
animatediff_batch_size = gr.Slider(
label="Animatediff batch size",
minimum=1,
maximum=50,
step=1,
value=32,
)
animatediff_stride = gr.Slider(
label="Animatediff stride",
minimum=1,
maximum=50,
step=1,
value=16,
)
fps_id = gr.Slider(
label="Frames per second",
info="The length of your video in seconds will be 25/fps",
value=6,
step=1,
minimum=5,
maximum=30,
)
prompt = gr.Textbox(label="Prompt", value="best quality, perfect anime illustration, light, a girl is dancing, smile, solo")
with gr.Row():
submit_btn = gr.Button(value="Generate")
#stop_btn = gr.Button(value="Stop", variant="stop")
clear_btn = gr.ClearButton([video_in, image_in, seed, video])
gr.Examples(
examples=examples,
fn=generate,
inputs=[video_in, image_in, prompt],
outputs=[video, seed],
cache_examples="lazy",
examples_per_page=4,
)
video_in.upload(update_frames, inputs=[video_in], outputs=[num_frames, fps_id, width, height])
submit_event = submit_btn.click(fn=generate, inputs=[video_in, image_in, prompt, seed, num_inference_steps, num_frames, height, width, animatediff_batch_size, animatediff_stride, fps_id], outputs=[video, seed], api_name="video")
#stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[submit_event])
demo.queue().launch()