File size: 2,207 Bytes
a991bc1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import cv2
import torch
import numpy as np
from diffusers import StableDiffusionPipeline
from PIL import Image

# Load the Real-Time Latent Consistency Model (LCM)
device = "cuda" if torch.cuda.is_available() else "cpu"
lcm_pipe = StableDiffusionPipeline.from_pretrained("latent-consistency/lcm-lora-sdv1-5").to(device)

def process_frame(image, prompt="A futuristic landscape", negative_prompt="not blurry"):
    """Modify the input image using the real-time latent consistency model (LCM)."""
    image = image.resize((512, 512))
    result = lcm_pipe(prompt=prompt, negative_prompt=negative_prompt, image=image, num_inference_steps=4, guidance_scale=7.5).images[0]
    return np.array(result)

def video_stream(prompt, negative_prompt):
    """Captures video feed from webcam and sends it to LCM in real time."""
    cap = cv2.VideoCapture(0)
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        frame = process_frame(image, prompt, negative_prompt)
        yield frame
    cap.release()

# Create Gradio App
with gr.Blocks() as demo:
    gr.Markdown("## 🎨 Real-Time AI-Enhanced Webcam using Latent Consistency Model (LCM)")
    
    with gr.Row():
        webcam_feed = gr.Camera(streaming=True, label="Live Webcam")
        processed_image = gr.Image(label="AI-Enhanced Webcam Feed")
    
    with gr.Row():
        image_input = gr.Image(type="pil", label="Upload Image for Processing")
        canvas_output = gr.Image(interactive=True, label="Canvas - Processed Image Output")
    
    prompt_input = gr.Textbox(label="Real-Time LCM Prompt", value="A futuristic landscape")
    negative_prompt_input = gr.Textbox(label="Negative Prompt", value="")
    start_button = gr.Button("Start Real-Time AI Enhancement")
    process_button = gr.Button("Process Uploaded Image")
    
    start_button.click(fn=video_stream, inputs=[prompt_input, negative_prompt_input], outputs=[processed_image, canvas_output])
    process_button.click(fn=process_frame, inputs=[image_input, prompt_input, negative_prompt_input], outputs=[canvas_output])

demo.launch(share=True)