import gradio as gr from diffusers import StableVideoDiffusionPipeline, EulerDiscreteScheduler import torch import spaces # Load the Stable Video Diffusion model model_id = "stabilityai/stable-video-diffusion-img2vid-xt" pipe = StableVideoDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="main") pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config) pipe.to("cuda") @spaces.GPU def generate_video(image, prompt, num_frames=25, height=576, width=1024): # Generate the video video_frames = pipe(prompt, image=image, num_frames=num_frames, height=height, width=width).frames return video_frames # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown("## Image to Video with Stable Diffusion XT") with gr.Row(): with gr.Column(): image_input = gr.Image(type="pil", label="Upload Image") prompt_input = gr.Textbox(lines=2, placeholder="Enter prompt...", label="Prompt") num_frames_input = gr.Slider(1, 50, step=1, value=25, label="Number of Frames") height_input = gr.Number(label="Resolution Height", value=576) width_input = gr.Number(label="Resolution Width", value=1024) run_button = gr.Button("Generate Video") with gr.Column(): video_output = gr.Video(label="Generated Video") run_button.click( generate_video, inputs=[image_input, prompt_input, num_frames_input, height_input, width_input], outputs=video_output ) # Launch the interface if __name__ == "__main__": demo.launch()