import gradio as gr
from diffusers import StableVideoDiffusionPipeline, EulerDiscreteScheduler
import torch
import spaces

# Load the Stable Video Diffusion model
model_id = "stabilityai/stable-video-diffusion-img2vid-xt"
pipe = StableVideoDiffusionPipeline.from_pretrained(model_id, torch_dtype=torch.float16, revision="main")
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.to("cuda")

@spaces.GPU
def generate_video(image, prompt, num_frames=25, height=576, width=1024):
    # Generate the video
    video_frames = pipe(prompt, image=image, num_frames=num_frames, height=height, width=width).frames
    return video_frames

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Image to Video with Stable Diffusion XT")
    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="pil", label="Upload Image")
            prompt_input = gr.Textbox(lines=2, placeholder="Enter prompt...", label="Prompt")
            num_frames_input = gr.Slider(1, 50, step=1, value=25, label="Number of Frames")
            height_input = gr.Number(label="Resolution Height", value=576)
            width_input = gr.Number(label="Resolution Width", value=1024)
            run_button = gr.Button("Generate Video")
        with gr.Column():
            video_output = gr.Video(label="Generated Video")

    run_button.click(
        generate_video,
        inputs=[image_input, prompt_input, num_frames_input, height_input, width_input],
        outputs=video_output
    )

# Launch the interface
if __name__ == "__main__":
    demo.launch()