Spaces:
Runtime error
Runtime error
File size: 2,194 Bytes
5b30a24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
import cv2
import torch
from pipelines.pipeline import InferencePipeline
import time
class ChaplinGradio:
def __init__(self):
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.vsr_model = None
self.load_models()
# Video params
self.fps = 16
self.frame_interval = 1 / self.fps
self.frame_compression = 25
self.last_frame_time = time.time()
def load_models(self):
"""Load models using the InferencePipeline with HF Space defaults"""
config = {
"model": {
"name": "chaplin_vsr",
"weights": "models/chaplin_vsr.pth",
"detector": "mediapipe"
}
}
self.vsr_model = InferencePipeline(
config,
device=self.device,
detector="mediapipe",
face_track=True
)
print("Model loaded successfully!")
def process_frame(self, frame):
"""Process a single frame with rate limiting and compression"""
current_time = time.time()
if current_time - self.last_frame_time < self.frame_interval:
return None
self.last_frame_time = current_time
if frame is None:
return "No video input detected"
# Compress frame
encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), self.frame_compression]
_, buffer = cv2.imencode('.jpg', frame, encode_param)
compressed_frame = cv2.imdecode(buffer, cv2.IMREAD_GRAYSCALE)
# Run inference using the VSR model
predicted_text = self.vsr_model.process_frame(compressed_frame)
return predicted_text
# Create Gradio interface
chaplin = ChaplinGradio()
iface = gr.Interface(
fn=chaplin.process_frame,
inputs=gr.Image(source="webcam", streaming=True),
outputs=gr.Textbox(label="Predicted Text"),
title="Chaplin - Live Visual Speech Recognition",
description="Use your webcam to perform real-time visual speech recognition.",
live=True
)
if __name__ == "__main__":
iface.launch() |