Spaces:

willwade
/

chaplinDemo

Runtime error

App Files Files Community

chaplinDemo / app.py

willwade

Create app.py

5b30a24 verified 26 days ago

raw

history blame

2.19 kB

	import gradio as gr
	import cv2
	import torch
	from pipelines.pipeline import InferencePipeline
	import time


	class ChaplinGradio:
	def __init__(self):
	self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	self.vsr_model = None
	self.load_models()

	# Video params
	self.fps = 16
	self.frame_interval = 1 / self.fps
	self.frame_compression = 25
	self.last_frame_time = time.time()

	def load_models(self):
	"""Load models using the InferencePipeline with HF Space defaults"""
	config = {
	"model": {
	"name": "chaplin_vsr",
	"weights": "models/chaplin_vsr.pth",
	"detector": "mediapipe"
	}
	}

	self.vsr_model = InferencePipeline(
	config,
	device=self.device,
	detector="mediapipe",
	face_track=True
	)
	print("Model loaded successfully!")

	def process_frame(self, frame):
	"""Process a single frame with rate limiting and compression"""
	current_time = time.time()

	if current_time - self.last_frame_time < self.frame_interval:
	return None

	self.last_frame_time = current_time

	if frame is None:
	return "No video input detected"

	# Compress frame
	encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), self.frame_compression]
	_, buffer = cv2.imencode('.jpg', frame, encode_param)
	compressed_frame = cv2.imdecode(buffer, cv2.IMREAD_GRAYSCALE)

	# Run inference using the VSR model
	predicted_text = self.vsr_model.process_frame(compressed_frame)

	return predicted_text


	# Create Gradio interface
	chaplin = ChaplinGradio()

	iface = gr.Interface(
	fn=chaplin.process_frame,
	inputs=gr.Image(source="webcam", streaming=True),
	outputs=gr.Textbox(label="Predicted Text"),
	title="Chaplin - Live Visual Speech Recognition",
	description="Use your webcam to perform real-time visual speech recognition.",
	live=True
	)

	if __name__ == "__main__":
	iface.launch()