Spaces:
Sleeping
Sleeping
import gradio as gr | |
import cv2 | |
import torch | |
import numpy as np | |
from diffusers import StableDiffusionPipeline,AutoPipelineForImage2Image,AutoencoderTiny | |
from transformers import AutoProcessor, AutoModel, AutoTokenizer | |
from PIL import Image | |
# | |
# | |
# def predict(prompt, frame): | |
# generator = torch.manual_seed(params.seed) | |
# steps = params.steps | |
# strength = params.strength | |
# if int(steps * strength) < 1: | |
# steps = math.ceil(1 / max(0.10, strength)) | |
# | |
# prompt = params.prompt | |
# prompt_embeds = None | |
# | |
# results = self.pipe( | |
# image=frame, | |
# prompt_embeds=prompt_embeds, | |
# prompt=prompt, | |
# negative_prompt=params.negative_prompt, | |
# generator=generator, | |
# strength=strength, | |
# num_inference_steps=steps, | |
# guidance_scale=1.1, | |
# width=params.width, | |
# height=params.height, | |
# output_type="pil", | |
# ) | |
# | |
# nsfw_content_detected = ( | |
# results.nsfw_content_detected[0] | |
# if "nsfw_content_detected" in results | |
# else False | |
# ) | |
# if nsfw_content_detected: | |
# return None | |
# result_image = results.images[0] | |
# | |
# return result_image | |
# | |
# def process_frame(frame, prompt="A futuristic landscape"): | |
# """Process a single frame using the real-time latent consistency model.""" | |
# | |
# # Convert frame to PIL image | |
# image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).resize((512, 512)) | |
# | |
# # Apply Real-Time Latent Consistency Model | |
# result = realtime_pipe(prompt=prompt, image=image, strength=0.5, guidance_scale=7.5).images[0] | |
# return np.array(result) | |
def video_stream(prompt): | |
"""Captures video feed from webcam and sends to the AI model.""" | |
cap = cv2.VideoCapture(0) | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
frame = process_frame(frame, prompt) | |
yield frame # Return processed frame | |
cap.release() | |
# Create Gradio App | |
with gr.Blocks() as demo: | |
gr.Markdown("## π¨ Real-Time AI-Enhanced Webcam using Latent Consistency Model (LCM)") | |
with gr.Row(): | |
webcam_feed = gr.Camera(streaming=True, label="Live Webcam") | |
processed_image = gr.Image(label="AI-Enhanced Webcam Feed") | |
with gr.Row(): | |
canvas_output = gr.Image(interactive=True, label="Canvas - Processed Image Output") | |
prompt_input = gr.Textbox(label="Real-Time LCM Prompt", value="A futuristic landscape") | |
start_button = gr.Button("Start Real-Time AI Enhancement") | |
#start_button.click(fn=video_stream, inputs=[prompt_input], outputs=[processed_image, canvas_output]) | |
demo.launch(share=True) | |