# video_processor.py from io import BytesIO import av import base64 from PIL import Image from typing import List from dataclasses import dataclass @dataclass class VideoProcessor: frame_format: str = "JPEG" frame_skip: int = 10 group_size: int = 10 def decode(self, video_path: str) -> List[Image.Image]: frames = [] container = av.open(video_path) for i, frame in enumerate(container.decode(video=0)): if i % self.frame_skip: continue im = frame.to_image() frames.append(im) return frames def concatenate(self, frames: List[Image.Image], direction: str = "horizontal") -> Image.Image: widths, heights = zip(*(frame.size for frame in frames)) if direction == "horizontal": total_width = sum(widths) max_height = max(heights) concatenated_image = Image.new('RGB', (total_width, max_height)) x_offset = 0 for frame in frames: concatenated_image.paste(frame, (x_offset, 0)) x_offset += frame.width else: max_width = max(widths) total_height = sum(heights) concatenated_image = Image.new('RGB', (max_width, total_height)) y_offset = 0 for frame in frames: concatenated_image.paste(frame, (0, y_offset)) y_offset += frame.height return concatenated_image def group_and_concatenate(self, frames: List[Image.Image], limit=10) -> List[Image.Image]: xs = len(frames) // self.group_size groups = [frames[i:i + xs] for i in range(0, len(frames), xs)] sampled_groups = [] for group in groups: interval = max(1, len(group) // limit) sampled_groups.append([group[i] for i in range(0, len(group), interval)]) return [self.concatenate(group) for group in sampled_groups] def to_base64_list(self, images: List[Image.Image]) -> List[str]: base64_list = [] for image in images: buffered = BytesIO() image.save(buffered, format=self.frame_format) base64_list.append(base64.b64encode(buffered.getvalue()).decode('utf-8')) return base64_list