Spaces:

mgbam
/

yeye

Runtime error

App Files Files Community

mgbam commited on 25 days ago

Commit

c7eb758

verified ·

1 Parent(s): 0c8eb2b

Update media_processing.py

Browse files

Files changed (1) hide show

media_processing.py +310 -1109

media_processing.py CHANGED Viewed

@@ -1,1167 +1,368 @@
 import os
 import base64
-import cv2
-import numpy as np
-from PIL import Image
-import pytesseract
 import requests
-from urllib.parse import urlparse, urljoin
-from bs4 import BeautifulSoup
-import html2text
-import json
-import time
-import webbrowser
-import urllib.parse
-import copy
-import html
 import tempfile
-import uuid
-import datetime
-import threading
-import atexit
-from huggingface_hub import HfApi
 import gradio as gr
-import subprocess
-import re
-# ---------------------------------------------------------------------------
-# Video temp-file management (per-session tracking and cleanup)
-# ---------------------------------------------------------------------------
-VIDEO_TEMP_DIR = os.path.join(tempfile.gettempdir(), "anycoder_videos")
-VIDEO_FILE_TTL_SECONDS = 6 * 60 * 60  # 6 hours
-_SESSION_VIDEO_FILES: Dict[str, List[str]] = {}
-_VIDEO_FILES_LOCK = threading.Lock()
-def _ensure_video_dir_exists() -> None:
-    try:
-        os.makedirs(VIDEO_TEMP_DIR, exist_ok=True)
-    except Exception:
-        pass
-def _register_video_for_session(session_id: Optional[str], file_path: str) -> None:
-    if not session_id or not file_path:
-        return
-    with _VIDEO_FILES_LOCK:
-        if session_id not in _SESSION_VIDEO_FILES:
-            _SESSION_VIDEO_FILES[session_id] = []
-        _SESSION_VIDEO_FILES[session_id].append(file_path)
-def cleanup_session_videos(session_id: Optional[str]) -> None:
-    if not session_id:
-        return
-    with _VIDEO_FILES_LOCK:
-        file_list = _SESSION_VIDEO_FILES.pop(session_id, [])
-    for path in file_list:
-        try:
-            if path and os.path.exists(path):
-                os.unlink(path)
-        except Exception:
-            # Best-effort cleanup
-            pass
-def reap_old_videos(ttl_seconds: int = VIDEO_FILE_TTL_SECONDS) -> None:
-    """Delete old video files in the temp directory based on modification time."""
-    try:
-        _ensure_video_dir_exists()
-        now_ts = time.time()
-        for name in os.listdir(VIDEO_TEMP_DIR):
-            path = os.path.join(VIDEO_TEMP_DIR, name)
-            try:
-                if not os.path.isfile(path):
-                    continue
-                mtime = os.path.getmtime(path)
-                if now_ts - mtime > ttl_seconds:
-                    os.unlink(path)
-            except Exception:
-                pass
-    except Exception:
-        # Temp dir might not exist or be accessible; ignore
-        pass
-# ---------------------------------------------------------------------------
-# Audio temp-file management (per-session tracking and cleanup)
-# ---------------------------------------------------------------------------
-AUDIO_TEMP_DIR = os.path.join(tempfile.gettempdir(), "anycoder_audio")
-AUDIO_FILE_TTL_SECONDS = 6 * 60 * 60  # 6 hours
-_SESSION_AUDIO_FILES: Dict[str, List[str]] = {}
-_AUDIO_FILES_LOCK = threading.Lock()
-def _ensure_audio_dir_exists() -> None:
-    try:
-        os.makedirs(AUDIO_TEMP_DIR, exist_ok=True)
-    except Exception:
-        pass
-def _register_audio_for_session(session_id: Optional[str], file_path: str) -> None:
-    if not session_id or not file_path:
-        return
-    with _AUDIO_FILES_LOCK:
-        if session_id not in _SESSION_AUDIO_FILES:
-            _SESSION_AUDIO_FILES[session_id] = []
-        _SESSION_AUDIO_FILES[session_id].append(file_path)
-def cleanup_session_audio(session_id: Optional[str]) -> None:
-    if not session_id:
-        return
-    with _AUDIO_FILES_LOCK:
-        file_list = _SESSION_AUDIO_FILES.pop(session_id, [])
-    for path in file_list:
-        try:
-            if path and os.path.exists(path):
-                os.unlink(path)
-        except Exception:
-            pass
-def reap_old_audio(ttl_seconds: int = AUDIO_FILE_TTL_SECONDS) -> None:
-    try:
-        _ensure_audio_dir_exists()
-        now_ts = time.time()
-        for name in os.listdir(AUDIO_TEMP_DIR):
-            path = os.path.join(AUDIO_TEMP_DIR, name)
-            try:
-                if not os.path.isfile(path):
-                    continue
-                mtime = os.path.getmtime(path)
-                if now_ts - mtime > ttl_seconds:
-                    os.unlink(path)
-            except Exception:
-                pass
-    except Exception:
-        pass
-# ---------------------------------------------------------------------------
-# General temp media file management (per-session tracking and cleanup)
-# ---------------------------------------------------------------------------
-MEDIA_TEMP_DIR = os.path.join(tempfile.gettempdir(), "anycoder_media")
-MEDIA_FILE_TTL_SECONDS = 6 * 60 * 60  # 6 hours
-_SESSION_MEDIA_FILES: Dict[str, List[str]] = {}
-_MEDIA_FILES_LOCK = threading.Lock()
-# Global dictionary to store temporary media files for the session
-temp_media_files = {}
-def _ensure_media_dir_exists() -> None:
-    """Ensure the media temp directory exists."""
-    try:
-        os.makedirs(MEDIA_TEMP_DIR, exist_ok=True)
-    except Exception:
-        pass
-def track_session_media_file(session_id: Optional[str], file_path: str) -> None:
-    """Track a media file for session-based cleanup."""
-    if not session_id or not file_path:
-        return
-    with _MEDIA_FILES_LOCK:
-        if session_id not in _SESSION_MEDIA_FILES:
-            _SESSION_MEDIA_FILES[session_id] = []
-        _SESSION_MEDIA_FILES[session_id].append(file_path)
-def cleanup_session_media(session_id: Optional[str]) -> None:
-    """Clean up media files for a specific session."""
-    if not session_id:
-        return
-    with _MEDIA_FILES_LOCK:
-        files_to_clean = _SESSION_MEDIA_FILES.pop(session_id, [])
-    for path in files_to_clean:
         try:
-            if path and os.path.exists(path):
-                os.unlink(path)
-        except Exception:
-            # Best-effort cleanup
-            pass
-def reap_old_media(ttl_seconds: int = MEDIA_FILE_TTL_SECONDS) -> None:
-    """Delete old media files in the temp directory based on modification time."""
-    try:
-        _ensure_media_dir_exists()
-        now_ts = time.time()
-        for name in os.listdir(MEDIA_TEMP_DIR):
-            path = os.path.join(MEDIA_TEMP_DIR, name)
-            if os.path.isfile(path):
-                try:
-                    mtime = os.path.getmtime(path)
-                    if (now_ts - mtime) > ttl_seconds:
-                        os.unlink(path)
-                except Exception:
-                    pass
-    except Exception:
-        # Temp dir might not exist or be accessible; ignore
-        pass
-def cleanup_all_temp_media_on_startup() -> None:
-    """Clean up all temporary media files on app startup."""
-    try:
-        # Clean up temp_media_files registry
-        temp_media_files.clear()
-        # Clean up actual files from disk (assume all are orphaned on startup)
-        _ensure_media_dir_exists()
-        for name in os.listdir(MEDIA_TEMP_DIR):
-            path = os.path.join(MEDIA_TEMP_DIR, name)
-            if os.path.isfile(path):
-                try:
-                    os.unlink(path)
-                except Exception:
-                    pass
-        # Clear session tracking
-        with _MEDIA_FILES_LOCK:
-            _SESSION_MEDIA_FILES.clear()
-        print("[StartupCleanup] Cleaned up orphaned temporary media files")
-    except Exception as e:
-        print(f"[StartupCleanup] Error during media cleanup: {str(e)}")
-def cleanup_all_temp_media_on_shutdown() -> None:
-    """Clean up all temporary media files on app shutdown."""
-    try:
-        print("[ShutdownCleanup] Cleaning up temporary media files...")
-        # Clean up temp_media_files registry and remove files
-        for file_id, file_info in temp_media_files.items():
-            try:
-                if os.path.exists(file_info['path']):
-                    os.unlink(file_info['path'])
-            except Exception:
-                pass
-        temp_media_files.clear()
-        # Clean up all session files
-        with _MEDIA_FILES_LOCK:
-            for session_id, file_paths in _SESSION_MEDIA_FILES.items():
-                for path in file_paths:
-                    try:
-                        if path and os.path.exists(path):
-                            os.unlink(path)
-                    except Exception:
-                        pass
-            _SESSION_MEDIA_FILES.clear()
-        print("[ShutdownCleanup] Temporary media cleanup completed")
-    except Exception as e:
-        print(f"[ShutdownCleanup] Error during cleanup: {str(e)}")
-# Register shutdown cleanup handler
-atexit.register(cleanup_all_temp_media_on_shutdown)
-def create_temp_media_url(media_bytes: bytes, filename: str, media_type: str = "image", session_id: Optional[str] = None) -> str:
-    """Create a temporary file and return a local URL for preview."""
-    try:
-        # Create unique filename with timestamp and UUID
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        unique_id = str(uuid.uuid4())[:8]
-        base_name, ext = os.path.splitext(filename)
-        unique_filename = f"{media_type}_{timestamp}_{unique_id}_{base_name}{ext}"
-        # Create temporary file in the dedicated directory
-        _ensure_media_dir_exists()
-        temp_path = os.path.join(MEDIA_TEMP_DIR, unique_filename)
-        # Write media bytes to temporary file
-        with open(temp_path, 'wb') as f:
-            f.write(media_bytes)
-        # Track file for session-based cleanup
-        if session_id:
-            track_session_media_file(session_id, temp_path)
-        # Store the file info for later upload
-        file_id = f"{media_type}_{unique_id}"
-        temp_media_files[file_id] = {
-            'path': temp_path,
-            'filename': filename,
-            'media_type': media_type,
-            'media_bytes': media_bytes
-        }
-        # Return file:// URL for preview
-        file_url = f"file://{temp_path}"
-        print(f"[TempMedia] Created temporary {media_type} file: {file_url}")
-        return file_url
-    except Exception as e:
-        print(f"[TempMedia] Failed to create temporary file: {str(e)}")
-        return f"Error creating temporary {media_type} file: {str(e)}"
-def upload_media_to_hf(media_bytes: bytes, filename: str, media_type: str = "image", token: gr.OAuthToken | None = None, use_temp: bool = True) -> str:
-    """Upload media file to user's Hugging Face account or create temporary file."""
-    try:
-        # If use_temp is True, create temporary file for preview
-        if use_temp:
-            return create_temp_media_url(media_bytes, filename, media_type)
-        # Otherwise, upload to Hugging Face for permanent URL
-        # Try to get token from OAuth first, then fall back to environment variable
-        hf_token = None
-        if token and token.token:
-            hf_token = token.token
-        else:
-            hf_token = os.getenv('HF_TOKEN')
-        if not hf_token:
-            return "Error: Please log in with your Hugging Face account to upload media, or set HF_TOKEN environment variable."
-        # Initialize HF API
-        api = HfApi(token=hf_token)
-        # Get current user info to determine username
         try:
-            user_info = api.whoami()
-            username = user_info.get('name', 'unknown-user')
         except Exception as e:
-            print(f"[HFUpload] Could not get user info: {e}")
-            username = 'anycoder-user'
-        # Create repository name for media storage
-        repo_name = f"{username}/anycoder-media"
-        # Try to create the repository if it doesn't exist
         try:
-            api.create_repo(
-                repo_id=repo_name,
-                repo_type="dataset",
-                private=False,
-                exist_ok=True
             )
-            print(f"[HFUpload] Repository {repo_name} ready")
         except Exception as e:
-            print(f"[HFUpload] Repository creation/access issue: {e}")
-            # Continue anyway, repo might already exist
-        # Create unique filename with timestamp and UUID
-        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-        unique_id = str(uuid.uuid4())[:8]
-        base_name, ext = os.path.splitext(filename)
-        unique_filename = f"{media_type}/{timestamp}_{unique_id}_{base_name}{ext}"
-        # Create temporary file for upload
-        with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
-            temp_file.write(media_bytes)
-            temp_path = temp_file.name
         try:
-            # Upload file to HF repository
-            api.upload_file(
-                path_or_fileobj=temp_path,
-                path_in_repo=unique_filename,
-                repo_id=repo_name,
-                repo_type="dataset",
-                commit_message=f"Upload {media_type} generated by AnyCoder"
             )
-            # Generate permanent URL
-            permanent_url = f"https://huggingface.co/datasets/{repo_name}/resolve/main/{unique_filename}"
-            print(f"[HFUpload] Successfully uploaded {media_type} to {permanent_url}")
-            return permanent_url
-        finally:
-            # Clean up temporary file
-            try:
-                os.unlink(temp_path)
-            except Exception:
-                pass
-    except Exception as e:
-        print(f"[HFUpload] Upload failed: {str(e)}")
-        return f"Error uploading {media_type} to Hugging Face: {str(e)}"
-def upload_temp_files_to_hf_and_replace_urls(html_content: str, token: gr.OAuthToken | None = None) -> str:
-    """Upload all temporary media files to HF and replace their URLs in HTML content."""
-    try:
-        if not temp_media_files:
-            print("[DeployUpload] No temporary media files to upload")
-            return html_content
-        print(f"[DeployUpload] Uploading {len(temp_media_files)} temporary media files to HF")
-        updated_content = html_content
-        for file_id, file_info in temp_media_files.items():
-            try:
-                # Upload to HF with permanent URL
-                permanent_url = upload_media_to_hf(
-                    file_info['media_bytes'],
-                    file_info['filename'],
-                    file_info['media_type'],
-                    token,
-                    use_temp=False  # Force permanent upload
-                )
-                if not permanent_url.startswith("Error"):
-                    # Replace the temporary file URL with permanent URL
-                    temp_url = f"file://{file_info['path']}"
-                    updated_content = updated_content.replace(temp_url, permanent_url)
-                    print(f"[DeployUpload] Replaced {temp_url} with {permanent_url}")
-                else:
-                    print(f"[DeployUpload] Failed to upload {file_id}: {permanent_url}")
-            except Exception as e:
-                print(f"[DeployUpload] Error uploading {file_id}: {str(e)}")
-                continue
-        # Clean up temporary files after upload
-        cleanup_temp_media_files()
-        return updated_content
-    except Exception as e:
-        print(f"[DeployUpload] Failed to upload temporary files: {str(e)}")
-        return html_content
-def cleanup_temp_media_files():
-    """Clean up temporary media files from disk and memory."""
-    try:
-        for file_id, file_info in temp_media_files.items():
             try:
-                if os.path.exists(file_info['path']):
-                    os.remove(file_info['path'])
-                    print(f"[TempCleanup] Removed {file_info['path']}")
             except Exception as e:
-                print(f"[TempCleanup] Failed to remove {file_info['path']}: {str(e)}")
-        # Clear the global dictionary
-        temp_media_files.clear()
-        print("[TempCleanup] Cleared temporary media files registry")
-    except Exception as e:
-        print(f"[TempCleanup] Error during cleanup: {str(e)}")
-def generate_image_with_qwen(prompt: str, image_index: int = 0, token: gr.OAuthToken | None = None) -> str:
-    """Generate image using Qwen image model via Hugging Face InferenceClient and upload to HF for permanent URL"""
-    try:
-        # Check if HF_TOKEN is available
-        if not os.getenv('HF_TOKEN'):
-            return "Error: HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token."
-        # Create InferenceClient for Qwen image generation
-        client = InferenceClient(
-            provider="auto",
-            api_key=os.getenv('HF_TOKEN'),
-            bill_to="huggingface",
-        )
-        # Generate image using Qwen/Qwen-Image model
-        image = client.text_to_image(
-            prompt,
-            model="Qwen/Qwen-Image",
-        )
-        # Resize image to reduce size while maintaining quality
-        max_size = 1024  # Increased size since we're not using data URIs
-        if image.width > max_size or image.height > max_size:
-            image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
-        # Convert PIL Image to bytes for upload
-        import io
-        buffer = io.BytesIO()
-        # Save as JPEG with good quality since we're not embedding
-        image.convert('RGB').save(buffer, format='JPEG', quality=90, optimize=True)
-        image_bytes = buffer.getvalue()
-        # Create temporary URL for preview (will be uploaded to HF during deploy)
-        filename = f"generated_image_{image_index}.jpg"
-        temp_url = upload_media_to_hf(image_bytes, filename, "image", token, use_temp=True)
-        # Check if creation was successful
-        if temp_url.startswith("Error"):
-            return temp_url
-        # Return HTML img tag with temporary URL
-        return f'<img src="{temp_url}" alt="{prompt}" style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;" loading="lazy" />'
-    except Exception as e:
-        print(f"Image generation error: {str(e)}")
-        return f"Error generating image: {str(e)}"
-def generate_image_to_image(input_image_data, prompt: str, token: gr.OAuthToken | None = None) -> str:
-    """Generate an image using image-to-image with Qwen-Image-Edit via Hugging Face InferenceClient."""
-    try:
-        # Check token
-        if not os.getenv('HF_TOKEN'):
-            return "Error: HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token."
-        # Prepare client
-        client = InferenceClient(
-            provider="auto",
-            api_key=os.getenv('HF_TOKEN'),
-            bill_to="huggingface",
-        )
-        # Normalize input image to bytes
-        import io
-        from PIL import Image
-        try:
-            import numpy as np
-        except Exception:
-            np = None
         if hasattr(input_image_data, 'read'):
-            # File-like object
             raw = input_image_data.read()
             pil_image = Image.open(io.BytesIO(raw))
         elif hasattr(input_image_data, 'mode') and hasattr(input_image_data, 'size'):
-            # PIL Image
             pil_image = input_image_data
-        elif np is not None and isinstance(input_image_data, np.ndarray):
             pil_image = Image.fromarray(input_image_data)
         elif isinstance(input_image_data, (bytes, bytearray)):
             pil_image = Image.open(io.BytesIO(input_image_data))
         else:
-            # Fallback: try to convert via bytes
             pil_image = Image.open(io.BytesIO(bytes(input_image_data)))
         # Ensure RGB
         if pil_image.mode != 'RGB':
             pil_image = pil_image.convert('RGB')
-        # Resize input image to avoid request body size limits
-        max_input_size = 1024
-        if pil_image.width > max_input_size or pil_image.height > max_input_size:
-            pil_image.thumbnail((max_input_size, max_input_size), Image.Resampling.LANCZOS)
-        buf = io.BytesIO()
-        pil_image.save(buf, format='JPEG', quality=85, optimize=True)
-        input_bytes = buf.getvalue()
-        # Call image-to-image
-        image = client.image_to_image(
-            input_bytes,
-            prompt=prompt,
-            model="Qwen/Qwen-Image-Edit",
-        )
-        # Resize/optimize (larger since not using data URIs)
-        max_size = 1024
-        if image.width > max_size or image.height > max_size:
-            image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
-        out_buf = io.BytesIO()
-        image.convert('RGB').save(out_buf, format='JPEG', quality=90, optimize=True)
-        image_bytes = out_buf.getvalue()
-        # Create temporary URL for preview (will be uploaded to HF during deploy)
-        filename = "image_to_image_result.jpg"
-        temp_url = upload_media_to_hf(image_bytes, filename, "image", token, use_temp=True)
-        # Check if creation was successful
-        if temp_url.startswith("Error"):
-            return temp_url
-        return f"<img src=\"{temp_url}\" alt=\"{prompt}\" style=\"max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;\" loading=\"lazy\" />"
-    except Exception as e:
-        print(f"Image-to-image generation error: {str(e)}")
-        return f"Error generating image (image-to-image): {str(e)}"
-def generate_video_from_image(input_image_data, prompt: str, session_id: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
-    """Generate a video from an input image and prompt using Hugging Face InferenceClient."""
-    try:
-        print("[Image2Video] Starting video generation")
-        if not os.getenv('HF_TOKEN'):
-            print("[Image2Video] Missing HF_TOKEN")
-            return "Error: HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token."
-        # Prepare client
-        client = InferenceClient(
-            provider="auto",
-            api_key=os.getenv('HF_TOKEN'),
-            bill_to="huggingface",
-        )
-        print(f"[Image2Video] InferenceClient initialized (provider=auto)")
-        # Normalize input image to bytes, with downscale/compress to cap request size
-        import io
-        from PIL import Image
-        try:
-            import numpy as np
-        except Exception:
-            np = None
-        def _load_pil(img_like) -> Image.Image:
-            if hasattr(img_like, 'read'):
-                return Image.open(io.BytesIO(img_like.read()))
-            if hasattr(img_like, 'mode') and hasattr(img_like, 'size'):
-                return img_like
-            if np is not None and isinstance(img_like, np.ndarray):
-                return Image.fromarray(img_like)
-            if isinstance(img_like, (bytes, bytearray)):
-                return Image.open(io.BytesIO(img_like))
-            return Image.open(io.BytesIO(bytes(img_like)))
-        pil_image = _load_pil(input_image_data)
-        if pil_image.mode != 'RGB':
-            pil_image = pil_image.convert('RGB')
-        try:
-            print(f"[Image2Video] Input PIL image size={pil_image.size} mode={pil_image.mode}")
-        except Exception:
-            pass
-        # Progressive encode to keep payload under ~3.9MB (below 4MB limit)
-        MAX_BYTES = 3_900_000
-        max_dim = 1024  # initial cap on longest edge
         quality = 90
         def encode_current(pil: Image.Image, q: int) -> bytes:
             tmp = io.BytesIO()
             pil.save(tmp, format='JPEG', quality=q, optimize=True)
             return tmp.getvalue()
-        # Downscale while the longest edge exceeds max_dim
         while max(pil_image.size) > max_dim:
             ratio = max_dim / float(max(pil_image.size))
             new_size = (max(1, int(pil_image.size[0] * ratio)), max(1, int(pil_image.size[1] * ratio)))
             pil_image = pil_image.resize(new_size, Image.Resampling.LANCZOS)
         encoded = encode_current(pil_image, quality)
-        # If still too big, iteratively reduce quality, then dimensions
         while len(encoded) > MAX_BYTES and (quality > 40 or max(pil_image.size) > 640):
             if quality > 40:
                 quality -= 10
             else:
-                # reduce dims by 15% if already at low quality
                 new_w = max(1, int(pil_image.size[0] * 0.85))
                 new_h = max(1, int(pil_image.size[1] * 0.85))
                 pil_image = pil_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
             encoded = encode_current(pil_image, quality)
-        input_bytes = encoded
-        # Call image-to-video; require method support
-        model_id = "Lightricks/LTX-Video-0.9.8-13B-distilled"
-        image_to_video_method = getattr(client, "image_to_video", None)
-        if not callable(image_to_video_method):
-            print("[Image2Video] InferenceClient.image_to_video not available in this huggingface_hub version")
-            return (
-                "Error generating video (image-to-video): Your installed huggingface_hub version "
-                "does not expose InferenceClient.image_to_video. Please upgrade with "
-                "`pip install -U huggingface_hub` and try again."
-            )
-        print(f"[Image2Video] Calling image_to_video with model={model_id}, prompt length={len(prompt or '')}")
-        video_bytes = image_to_video_method(
-            input_bytes,
-            prompt=prompt,
-            model=model_id,
-        )
-        print(f"[Image2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
-        # Create temporary URL for preview (will be uploaded to HF during deploy)
-        filename = "image_to_video_result.mp4"
-        temp_url = upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
-        # Check if creation was successful
-        if temp_url.startswith("Error"):
-            return temp_url
-        video_html = (
-            f'<video controls autoplay muted loop playsinline '
-            f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
-            f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
-            f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
-            f'<source src="{temp_url}" type="video/mp4" />'
-            f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
-            f'</video>'
-        )
-        print(f"[Image2Video] Successfully generated video HTML tag with temporary URL: {temp_url}")
-        # Validate the generated video HTML
-        if not validate_video_html(video_html):
-            print("[Image2Video] Generated video HTML failed validation")
-            return "Error: Generated video HTML is malformed"
-        return video_html
-    except Exception as e:
-        import traceback
-        print("[Image2Video] Exception during generation:")
-        traceback.print_exc()
-        print(f"Image-to-video generation error: {str(e)}")
-        return f"Error generating video (image-to-video): {str(e)}"
-def generate_video_from_text(prompt: str, session_id: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
-    """Generate a video from a text prompt using Hugging Face InferenceClient."""
-    try:
-        print("[Text2Video] Starting video generation from text")
-        if not os.getenv('HF_TOKEN'):
-            print("[Text2Video] Missing HF_TOKEN")
-            return "Error: HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token."
-        client = InferenceClient(
-            provider="auto",
-            api_key=os.getenv('HF_TOKEN'),
-            bill_to="huggingface",
-        )
-        print("[Text2Video] InferenceClient initialized (provider=auto)")
-        # Ensure the client has text_to_video (newer huggingface_hub)
-        text_to_video_method = getattr(client, "text_to_video", None)
-        if not callable(text_to_video_method):
-            print("[Text2Video] InferenceClient.text_to_video not available in this huggingface_hub version")
-            return (
-                "Error generating video (text-to-video): Your installed huggingface_hub version "
-                "does not expose InferenceClient.text_to_video. Please upgrade with "
-                "`pip install -U huggingface_hub` and try again."
-            )
-        model_id = "Wan-AI/Wan2.2-T2V-A14B"
-        prompt_str = (prompt or "").strip()
-        print(f"[Text2Video] Calling text_to_video with model={model_id}, prompt length={len(prompt_str)}")
-        video_bytes = text_to_video_method(
-            prompt_str,
-            model=model_id,
-        )
-        print(f"[Text2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown length'}")
-        # Create temporary URL for preview (will be uploaded to HF during deploy)
-        filename = "text_to_video_result.mp4"
-        temp_url = upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
-        # Check if creation was successful
-        if temp_url.startswith("Error"):
-            return temp_url
-        video_html = (
-            f'<video controls autoplay muted loop playsinline '
-            f'style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;" '
-            f'onloadstart="this.style.backgroundColor=\'#f0f0f0\'" '
-            f'onerror="this.style.display=\'none\'; console.error(\'Video failed to load\')">'
-            f'<source src="{temp_url}" type="video/mp4" />'
-            f'<p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>'
-            f'</video>'
-        )
-        print(f"[Text2Video] Successfully generated video HTML tag with temporary URL: {temp_url}")
-        # Validate the generated video HTML
-        if not validate_video_html(video_html):
-            print("[Text2Video] Generated video HTML failed validation")
-            return "Error: Generated video HTML is malformed"
-        return video_html
-    except Exception as e:
-        import traceback
-        print("[Text2Video] Exception during generation:")
-        traceback.print_exc()
-        print(f"Text-to-video generation error: {str(e)}")
-        return f"Error generating video (text-to-video): {str(e)}"
-def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_id: Optional[str] = None, token: gr.OAuthToken | None = None) -> str:
-    """Generate music from a text prompt using ElevenLabs Music API and return an HTML <audio> tag."""
-    try:
-        api_key = os.getenv('ELEVENLABS_API_KEY')
-        if not api_key:
-            return "Error: ELEVENLABS_API_KEY environment variable is not set."
-        headers = {
-            'Content-Type': 'application/json',
-            'xi-api-key': api_key,
-        }
-        payload = {
-            'prompt': (prompt or 'Epic orchestral theme with soaring strings and powerful brass'),
-            'music_length_ms': int(music_length_ms) if music_length_ms else 30000,
-        }
-        resp = requests.post('https://api.elevenlabs.io/v1/music/compose', headers=headers, json=payload)
-        try:
-            resp.raise_for_status()
-        except Exception as e:
-            return f"Error generating music: {getattr(e, 'response', resp).text if hasattr(e, 'response') else resp.text}"
-        # Create temporary URL for preview (will be uploaded to HF during deploy)
-        filename = "generated_music.mp3"
-        temp_url = upload_media_to_hf(resp.content, filename, "audio", token, use_temp=True)
-        # Check if creation was successful
-        if temp_url.startswith("Error"):
-            return temp_url
-        audio_html = (
-            "<div class=\"anycoder-music\" style=\"max-width:420px;margin:16px auto;padding:12px 16px;border:1px solid #e5e7eb;border-radius:12px;background:linear-gradient(180deg,#fafafa,#f3f4f6);box-shadow:0 2px 8px rgba(0,0,0,0.06)\">"
-            "  <div style=\"font-size:13px;color:#374151;margin-bottom:8px;display:flex:align-items:center;gap:6px\">"
-            "    <span>🎵 Generated music</span>"
-            "  </div>"
-            f"  <audio controls autoplay loop style=\"width:100%;outline:none;\">"
-            f"    <source src=\"{temp_url}\" type=\"audio/mpeg\" />"
-            "    Your browser does not support the audio element."
-            "  </audio>"
-            "</div>"
-        )
-        print(f"[Music] Successfully generated music HTML tag with temporary URL: {temp_url}")
-        return audio_html
-    except Exception as e:
-        return f"Error generating music: {str(e)}"
-def extract_image_prompts_from_text(text: str, num_images_needed: int = 1) -> list:
-    """Extract image generation prompts from the full text based on number of images needed"""
-    # Use the entire text as the base prompt for image generation
-    # Clean up the text and create variations for the required number of images
-    # Clean the text
-    cleaned_text = text.strip()
-    if not cleaned_text:
-        return []
-    # Create variations of the prompt for the required number of images
-    prompts = []
-    # Generate exactly the number of images needed
-    for i in range(num_images_needed):
-        if i == 0:
-            # First image: Use the full prompt as-is
-            prompts.append(cleaned_text)
-        elif i == 1:
-            # Second image: Add "visual representation" to make it more image-focused
-            prompts.append(f"Visual representation of {cleaned_text}")
-        elif i == 2:
-            # Third image: Add "illustration" to create a different style
-            prompts.append(f"Illustration of {cleaned_text}")
-        else:
-            # For additional images, use different variations
-            variations = [
-                f"Digital art of {cleaned_text}",
-                f"Modern design of {cleaned_text}",
-                f"Professional illustration of {cleaned_text}",
-                f"Clean design of {cleaned_text}",
-                f"Beautiful visualization of {cleaned_text}",
-                f"Stylish representation of {cleaned_text}",
-                f"Contemporary design of {cleaned_text}",
-                f"Elegant illustration of {cleaned_text}"
-            ]
-            variation_index = (i - 3) % len(variations)
-            prompts.append(variations[variation_index])
-    return prompts
-def create_image_replacement_blocks(html_content: str, user_prompt: str) -> str:
-    """Create search/replace blocks to replace placeholder images with generated Qwen images"""
-    if not user_prompt:
-        return ""
-    # Find existing image placeholders in the HTML first
-    import re
-    # Common patterns for placeholder images
-    placeholder_patterns = [
-        r'<img[^>]*src=["\'](?:placeholder|dummy|sample|example)[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://via\.placeholder\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://picsum\.photos[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://dummyimage\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*alt=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*class=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*id=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']data:image[^"\']*["\'][^>]*>',  # Base64 images
-        r'<img[^>]*src=["\']#["\'][^>]*>',  # Empty src
-        r'<img[^>]*src=["\']about:blank["\'][^>]*>',  # About blank
-    ]
-    # Find all placeholder images
-    placeholder_images = []
-    for pattern in placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE)
-        placeholder_images.extend(matches)
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
-    # If no placeholder images found, look for any img tags
-    if not placeholder_images:
-        img_pattern = r'<img[^>]*>'
-        # Case-insensitive to catch <IMG> or mixed-case tags
-        placeholder_images = re.findall(img_pattern, html_content, re.IGNORECASE)
-    # Also look for div elements that might be image placeholders
-    div_placeholder_patterns = [
-        r'<div[^>]*class=["\'][^"\']*(?:image|img|photo|picture)[^"\']*["\'][^>]*>.*?</div>',
-        r'<div[^>]*id=["\'][^"\']*(?:image|img|photo|picture)[^"\']*["\'][^>]*>.*?</div>',
-    ]
-    for pattern in div_placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE | re.DOTALL)
-        placeholder_images.extend(matches)
-    # Count how many images we need to generate
-    num_images_needed = len(placeholder_images)
-    if num_images_needed == 0:
-        return ""
-    # Generate image prompts based on the number of images found
-    image_prompts = extract_image_prompts_from_text(user_prompt, num_images_needed)
-    # Generate images for each prompt
-    generated_images = []
-    for i, prompt in enumerate(image_prompts):
-        image_html = generate_image_with_qwen(prompt, i, token=None)  # TODO: Pass token from parent context
-        if not image_html.startswith("Error"):
-            generated_images.append((i, image_html))
-    if not generated_images:
-        return ""
-    # Create search/replace blocks
-    replacement_blocks = []
-    for i, (prompt_index, generated_image) in enumerate(generated_images):
-        if i < len(placeholder_images):
-            # Replace existing placeholder
-            placeholder = placeholder_images[i]
-            # Clean up the placeholder for better matching
-            placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
-            # Try multiple variations of the placeholder for better matching
-            placeholder_variations = [
-                placeholder_clean,
-                placeholder_clean.replace('"', "'"),
-                placeholder_clean.replace("'", '"'),
-                re.sub(r'\s+', ' ', placeholder_clean),
-                placeholder_clean.replace('  ', ' '),
-            ]
-            # Create a replacement block for each variation
-            for variation in placeholder_variations:
-                replacement_blocks.append(f"""{SEARCH_START}
-{variation}
-{DIVIDER}
-{generated_image}
-{REPLACE_END}""")
-        else:
-            # Add new image if we have more generated images than placeholders
-            # Find a good insertion point (after body tag or main content)
-            if '<body' in html_content:
-                body_end = html_content.find('>', html_content.find('<body')) + 1
-                insertion_point = html_content[:body_end] + '\n    '
-                replacement_blocks.append(f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-    {generated_image}
-{REPLACE_END}""")
-    return '\n\n'.join(replacement_blocks)
-def create_image_replacement_blocks_text_to_image_single(html_content: str, prompt: str) -> str:
-    """Create search/replace blocks that generate and insert ONLY ONE text-to-image result."""
-    if not prompt or not prompt.strip():
-        return ""
-    import re
-    # Detect placeholders similarly to the multi-image version
-    placeholder_patterns = [
-        r'<img[^>]*src=["\'](?:placeholder|dummy|sample|example)[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://via\.placeholder\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://picsum\.photos[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://dummyimage\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*alt=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*class=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*id=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']data:image[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']#["\'][^>]*>',
-        r'<img[^>]*src=["\']about:blank["\'][^>]*>',
-    ]
-    placeholder_images = []
-    for pattern in placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE)
-        if matches:
-            placeholder_images.extend(matches)
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
-    # Fallback to any <img> if no placeholders
-    if not placeholder_images:
-        img_pattern = r'<img[^>]*>'
-        placeholder_images = re.findall(img_pattern, html_content)
-    # Generate a single image
-    image_html = generate_image_with_qwen(prompt, 0, token=None)  # TODO: Pass token from parent context
-    if image_html.startswith("Error"):
-        return ""
-    # Replace first placeholder if present
-    if placeholder_images:
-        placeholder = placeholder_images[0]
-        placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
-        placeholder_variations = [
-            placeholder_clean,
-            placeholder_clean.replace('"', "'"),
-            placeholder_clean.replace("'", '"'),
-            re.sub(r'\s+', ' ', placeholder_clean),
-            placeholder_clean.replace('  ', ' '),
-        ]
-        blocks = []
-        for variation in placeholder_variations:
-            blocks.append(f"""{SEARCH_START}
-{variation}
-{DIVIDER}
-{image_html}
-{REPLACE_END}""")
-        return '\n\n'.join(blocks)
-    # Otherwise insert after <body>
-    if '<body' in html_content:
-        body_end = html_content.find('>', html_content.find('<body')) + 1
-        insertion_point = html_content[:body_end] + '\n    '
-        return f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-    {image_html}
-{REPLACE_END}"""
-    # If no <body>, just append
-    return f"{SEARCH_START}\n\n{DIVIDER}\n{image_html}\n{REPLACE_END}"
-def create_video_replacement_blocks_text_to_video(html_content: str, prompt: str, session_id: Optional[str] = None) -> str:
-    """Create search/replace blocks that generate and insert ONLY ONE text-to-video result."""
-    if not prompt or not prompt.strip():
-        return ""
-    import re
-    # Detect the same placeholders as image counterparts, to replace the first image slot with a video
-    placeholder_patterns = [
-        r'<img[^>]*src=["\'](?:placeholder|dummy|sample|example)[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://via\.placeholder\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://picsum\.photos[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']https?://dummyimage\.com[^"\']*["\'][^>]*>',
-        r'<img[^>]*alt=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*class=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*id=["\'][^"\']*placeholder[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']data:image[^"\']*["\'][^>]*>',
-        r'<img[^>]*src=["\']#["\'][^>]*>',
-        r'<img[^>]*src=["\']about:blank["\'][^>]*>',
-    ]
-    placeholder_images = []
-    for pattern in placeholder_patterns:
-        matches = re.findall(pattern, html_content, re.IGNORECASE)
-        if matches:
-            placeholder_images.extend(matches)
-    # Filter out HF URLs from placeholders (they are real generated content)
-    placeholder_images = [img for img in placeholder_images if 'huggingface.co/datasets/' not in img]
-    if not placeholder_images:
-        img_pattern = r'<img[^>]*>'
-        placeholder_images = re.findall(img_pattern, html_content)
-    video_html = generate_video_from_text(prompt, session_id=session_id, token=None)  # TODO: Pass token from parent context
-    if video_html.startswith("Error"):
-        return ""
-    # Replace first placeholder if present
-    if placeholder_images:
-        placeholder = placeholder_images[0]
-        placeholder_clean = re.sub(r'\s+', ' ', placeholder.strip())
-        placeholder_variations = [
-            placeholder,
-            placeholder_clean,
-            placeholder_clean.replace('"', "'"),
-            placeholder_clean.replace("'", '"'),
-            re.sub(r'\s+', ' ', placeholder_clean),
-            placeholder_clean.replace('  ', ' '),
-        ]
-        blocks = []
-        for variation in placeholder_variations:
-            blocks.append(f"""{SEARCH_START}
-{variation}
-{DIVIDER}
-{video_html}
-{REPLACE_END}""")
-        return '\n\n'.join(blocks)
-    # Otherwise insert after <body> with proper container
-    if '<body' in html_content:
-        body_start = html_content.find('<body')
-        body_end = html_content.find('>', body_start) + 1
-        opening_body_tag = html_content[body_start:body_end]
-        # Look for existing container elements to insert into
-        body_content_start = body_end
-        # Try to find a good insertion point within existing content structure
-        patterns_to_try = [
-            r'<main[^>]*>',
-            r'<section[^>]*class="[^"]*hero[^"]*"[^>]*>',
-            r'<div[^>]*class="[^"]*container[^"]*"[^>]*>',
-            r'<header[^>]*>',
-        ]
-        insertion_point = None
-        for pattern in patterns_to_try:
-            import re
-            match = re.search(pattern, html_content[body_content_start:], re.IGNORECASE)
-            if match:
-                match_end = body_content_start + match.end()
-                # Find the end of this tag
-                tag_content = html_content[body_content_start + match.start():match_end]
-                insertion_point = html_content[:match_end] + '\n        '
-                break
-        if not insertion_point:
-            # Fallback to right after body tag with container div
-            insertion_point = html_content[:body_end] + '\n    '
-            video_with_container = f'<div class="video-container" style="margin: 20px 0; text-align: center;">\n        {video_html}\n    </div>'
-            return f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-    {video_with_container}
-{REPLACE_END}"""
-        else:
-            return f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-        {video_html}
-{REPLACE_END}"""
-    # If no <body>, just append
-    return f"{SEARCH_START}\n\n{DIVIDER}\n{video_html}\n{REPLACE_END}"
-def create_music_replacement_blocks_text_to_music(html_content: str, prompt: str, session_id: Optional[str] = None) -> str:
-    """Create search/replace blocks that insert ONE generated <audio> near the top of <body>."""
-    if not prompt or not prompt.strip():
-        return ""
-    audio_html = generate_music_from_text(prompt, session_id=session_id, token=None)  # TODO: Pass token from parent context
-    if audio_html.startswith("Error"):
-        return ""
-    # Prefer inserting after the first <section>...</section> if present; else after <body>
-    import re
-    section_match = re.search(r"<section\b[\s\S]*?</section>", html_content, flags=re.IGNORECASE)
-    if section_match:
-        section_html = section_match.group(0)
-        section_clean = re.sub(r"\s+", " ", section_html.strip())
-        variations = [
-            section_html,
-            section_clean,
-            section_clean.replace('"', "'"),
-            section_clean.replace("'", '"'),
-            re.sub(r"\s+", " ", section_clean),
-        ]
-        blocks = []
-        for v in variations:
-            blocks.append(f"""{SEARCH_START}
-{v}
-{DIVIDER}
-{v}\n    {audio_html}
-{REPLACE_END}""")
-        return "\n\n".join(blocks)
-    if '<body' in html_content:
-        body_end = html_content.find('>', html_content.find('<body')) + 1
-        insertion_point = html_content[:body_end] + '\n    '
-        return f"""{SEARCH_START}
-{insertion_point}
-{DIVIDER}
-{insertion_point}
-    {audio_html}
-{REPLACE_END}"""
-    # If no <body>, just append
-    return f"{SEARCH_START}\n\n{DIVIDER}\n{audio_html}\n{REPLACE_END}"

+"""
+Media generation functions for images, videos, and music using various AI models.
+"""
 import os
+import io
 import base64
 import requests
 import tempfile
+from typing import Optional, Dict, Any
+from PIL import Image
+import numpy as np
 import gradio as gr
+from huggingface_hub import InferenceClient
+from utils import create_temp_media_url, compress_media_for_data_uri, validate_video_html
+from config import HF_TOKEN
+class MediaGenerator:
+    """Handles generation of images, videos, and music"""
+    def __init__(self):
+        self.hf_client = None
+        if HF_TOKEN:
+            self.hf_client = InferenceClient(
+                provider="auto",
+                api_key=HF_TOKEN,
+                bill_to="huggingface"
+            )
+    def generate_image_with_qwen(self, prompt: str, image_index: int = 0,
+                                token: Optional[gr.OAuthToken] = None) -> str:
+        """Generate image using Qwen image model"""
         try:
+            if not self.hf_client:
+                return "Error: HF_TOKEN environment variable is not set."
+            print(f"[ImageGen] Generating image with prompt: {prompt}")
+            # Generate image using Qwen/Qwen-Image model
+            image = self.hf_client.text_to_image(
+                prompt,
+                model="Qwen/Qwen-Image",
+            )
+            # Resize image to reduce size while maintaining quality
+            max_size = 1024
+            if image.width > max_size or image.height > max_size:
+                image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
+            # Convert to bytes
+            buffer = io.BytesIO()
+            image.convert('RGB').save(buffer, format='JPEG', quality=90, optimize=True)
+            image_bytes = buffer.getvalue()
+            # Create temporary URL
+            filename = f"generated_image_{image_index}.jpg"
+            temp_url = self._upload_media_to_hf(image_bytes, filename, "image", token, use_temp=True)
+            if temp_url.startswith("Error"):
+                return temp_url
+            return f'<img src="{temp_url}" alt="{prompt}" style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;" loading="lazy" />'
+        except Exception as e:
+            print(f"Image generation error: {str(e)}")
+            return f"Error generating image: {str(e)}"
+    def generate_image_to_image(self, input_image_data, prompt: str,
+                              token: Optional[gr.OAuthToken] = None) -> str:
+        """Generate image using image-to-image with Qwen-Image-Edit"""
         try:
+            if not self.hf_client:
+                return "Error: HF_TOKEN environment variable is not set."
+            print(f"[Image2Image] Processing with prompt: {prompt}")
+            # Normalize input image to bytes
+            pil_image = self._process_input_image(input_image_data)
+            # Resize input image to avoid request body size limits
+            max_input_size = 1024
+            if pil_image.width > max_input_size or pil_image.height > max_input_size:
+                pil_image.thumbnail((max_input_size, max_input_size), Image.Resampling.LANCZOS)
+            # Convert to bytes
+            buf = io.BytesIO()
+            pil_image.save(buf, format='JPEG', quality=85, optimize=True)
+            input_bytes = buf.getvalue()
+            # Call image-to-image
+            image = self.hf_client.image_to_image(
+                input_bytes,
+                prompt=prompt,
+                model="Qwen/Qwen-Image-Edit",
+            )
+            # Resize and optimize output
+            max_size = 1024
+            if image.width > max_size or image.height > max_size:
+                image.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
+            out_buf = io.BytesIO()
+            image.convert('RGB').save(out_buf, format='JPEG', quality=90, optimize=True)
+            image_bytes = out_buf.getvalue()
+            # Create temporary URL
+            filename = "image_to_image_result.jpg"
+            temp_url = self._upload_media_to_hf(image_bytes, filename, "image", token, use_temp=True)
+            if temp_url.startswith("Error"):
+                return temp_url
+            return f'<img src="{temp_url}" alt="{prompt}" style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0;" loading="lazy" />'
         except Exception as e:
+            print(f"Image-to-image generation error: {str(e)}")
+            return f"Error generating image (image-to-image): {str(e)}"
+    def generate_video_from_image(self, input_image_data, prompt: str,
+                                session_id: Optional[str] = None,
+                                token: Optional[gr.OAuthToken] = None) -> str:
+        """Generate video from input image using Lightricks LTX-Video"""
         try:
+            print("[Image2Video] Starting video generation")
+            if not self.hf_client:
+                return "Error: HF_TOKEN environment variable is not set."
+            # Process input image
+            pil_image = self._process_input_image(input_image_data)
+            print(f"[Image2Video] Input image size: {pil_image.size}")
+            # Compress image for API limits
+            input_bytes = self._compress_image_for_video(pil_image, max_size_mb=3.9)
+            # Check for image-to-video method
+            image_to_video_method = getattr(self.hf_client, "image_to_video", None)
+            if not callable(image_to_video_method):
+                return ("Error: Your huggingface_hub version does not support image_to_video. "
+                       "Please upgrade with `pip install -U huggingface_hub`")
+            model_id = "Lightricks/LTX-Video-0.9.8-13B-distilled"
+            print(f"[Image2Video] Calling API with model: {model_id}")
+            video_bytes = image_to_video_method(
+                input_bytes,
+                prompt=prompt,
+                model=model_id,
             )
+            print(f"[Image2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown'}")
+            # Create temporary URL
+            filename = "image_to_video_result.mp4"
+            temp_url = self._upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
+            if temp_url.startswith("Error"):
+                return temp_url
+            video_html = self._create_video_html(temp_url)
+            if not validate_video_html(video_html):
+                return "Error: Generated video HTML is malformed"
+            print(f"[Image2Video] Successfully generated video: {temp_url}")
+            return video_html
         except Exception as e:
+            print(f"[Image2Video] Error: {str(e)}")
+            return f"Error generating video (image-to-video): {str(e)}"
+    def generate_video_from_text(self, prompt: str, session_id: Optional[str] = None,
+                               token: Optional[gr.OAuthToken] = None) -> str:
+        """Generate video from text prompt using Wan-AI text-to-video model"""
         try:
+            print("[Text2Video] Starting video generation")
+            if not self.hf_client:
+                return "Error: HF_TOKEN environment variable is not set."
+            # Check for text-to-video method
+            text_to_video_method = getattr(self.hf_client, "text_to_video", None)
+            if not callable(text_to_video_method):
+                return ("Error: Your huggingface_hub version does not support text_to_video. "
+                       "Please upgrade with `pip install -U huggingface_hub`")
+            model_id = "Wan-AI/Wan2.2-T2V-A14B"
+            prompt_str = (prompt or "").strip()
+            print(f"[Text2Video] Using model: {model_id}, prompt length: {len(prompt_str)}")
+            video_bytes = text_to_video_method(
+                prompt_str,
+                model=model_id,
             )
+            print(f"[Text2Video] Received video bytes: {len(video_bytes) if hasattr(video_bytes, '__len__') else 'unknown'}")
+            # Create temporary URL
+            filename = "text_to_video_result.mp4"
+            temp_url = self._upload_media_to_hf(video_bytes, filename, "video", token, use_temp=True)
+            if temp_url.startswith("Error"):
+                return temp_url
+            video_html = self._create_video_html(temp_url)
+            if not validate_video_html(video_html):
+                return "Error: Generated video HTML is malformed"
+            print(f"[Text2Video] Successfully generated video: {temp_url}")
+            return video_html
+        except Exception as e:
+            print(f"[Text2Video] Error: {str(e)}")
+            return f"Error generating video (text-to-video): {str(e)}"
+    def generate_music_from_text(self, prompt: str, music_length_ms: int = 30000,
+                               session_id: Optional[str] = None,
+                               token: Optional[gr.OAuthToken] = None) -> str:
+        """Generate music using ElevenLabs Music API"""
+        try:
+            api_key = os.getenv('ELEVENLABS_API_KEY')
+            if not api_key:
+                return "Error: ELEVENLABS_API_KEY environment variable is not set."
+            print(f"[MusicGen] Generating music: {prompt}")
+            headers = {
+                'Content-Type': 'application/json',
+                'xi-api-key': api_key,
+            }
+            payload = {
+                'prompt': prompt or 'Epic orchestral theme with soaring strings and powerful brass',
+                'music_length_ms': int(music_length_ms) if music_length_ms else 30000,
+            }
+            resp = requests.post(
+                'https://api.elevenlabs.io/v1/music/compose',
+                headers=headers,
+                json=payload,
+                timeout=60
+            )
             try:
+                resp.raise_for_status()
             except Exception as e:
+                error_text = getattr(e, 'response', resp).text if hasattr(e, 'response') else resp.text
+                return f"Error generating music: {error_text}"
+            # Create temporary URL
+            filename = "generated_music.mp3"
+            temp_url = self._upload_media_to_hf(resp.content, filename, "audio", token, use_temp=True)
+            if temp_url.startswith("Error"):
+                return temp_url
+            audio_html = self._create_audio_html(temp_url)
+            print(f"[MusicGen] Successfully generated music: {temp_url}")
+            return audio_html
+        except Exception as e:
+            print(f"[MusicGen] Error: {str(e)}")
+            return f"Error generating music: {str(e)}"
+    def _process_input_image(self, input_image_data) -> Image.Image:
+        """Convert various image formats to PIL Image"""
         if hasattr(input_image_data, 'read'):
             raw = input_image_data.read()
             pil_image = Image.open(io.BytesIO(raw))
         elif hasattr(input_image_data, 'mode') and hasattr(input_image_data, 'size'):
             pil_image = input_image_data
+        elif isinstance(input_image_data, np.ndarray):
             pil_image = Image.fromarray(input_image_data)
         elif isinstance(input_image_data, (bytes, bytearray)):
             pil_image = Image.open(io.BytesIO(input_image_data))
         else:
             pil_image = Image.open(io.BytesIO(bytes(input_image_data)))
         # Ensure RGB
         if pil_image.mode != 'RGB':
             pil_image = pil_image.convert('RGB')
+        return pil_image
+    def _compress_image_for_video(self, pil_image: Image.Image, max_size_mb: float = 3.9) -> bytes:
+        """Compress image for video generation API limits"""
+        MAX_BYTES = int(max_size_mb * 1024 * 1024)
+        max_dim = 1024
         quality = 90
         def encode_current(pil: Image.Image, q: int) -> bytes:
             tmp = io.BytesIO()
             pil.save(tmp, format='JPEG', quality=q, optimize=True)
             return tmp.getvalue()
+        # Downscale while too large
         while max(pil_image.size) > max_dim:
             ratio = max_dim / float(max(pil_image.size))
             new_size = (max(1, int(pil_image.size[0] * ratio)), max(1, int(pil_image.size[1] * ratio)))
             pil_image = pil_image.resize(new_size, Image.Resampling.LANCZOS)
         encoded = encode_current(pil_image, quality)
+        # Reduce quality or dimensions if still too large
         while len(encoded) > MAX_BYTES and (quality > 40 or max(pil_image.size) > 640):
             if quality > 40:
                 quality -= 10
             else:
                 new_w = max(1, int(pil_image.size[0] * 0.85))
                 new_h = max(1, int(pil_image.size[1] * 0.85))
                 pil_image = pil_image.resize((new_w, new_h), Image.Resampling.LANCZOS)
             encoded = encode_current(pil_image, quality)
+        return encoded
+    def _upload_media_to_hf(self, media_bytes: bytes, filename: str, media_type: str,
+                          token: Optional[gr.OAuthToken] = None, use_temp: bool = True) -> str:
+        """Upload media to HF or create temporary file"""
+        if use_temp:
+            return create_temp_media_url(media_bytes, filename, media_type)
+        # HF upload logic would go here for permanent URLs
+        # For now, always use temp files
+        return create_temp_media_url(media_bytes, filename, media_type)
+    def _create_video_html(self, video_url: str) -> str:
+        """Create HTML video element"""
+        return f'''<video controls autoplay muted loop playsinline
+                   style="max-width: 100%; height: auto; border-radius: 8px; margin: 10px 0; display: block;"
+                   onloadstart="this.style.backgroundColor='#f0f0f0'"
+                   onerror="this.style.display='none'; console.error('Video failed to load')">
+                   <source src="{video_url}" type="video/mp4" />
+                   <p style="text-align: center; color: #666;">Your browser does not support the video tag.</p>
+                   </video>'''
+    def _create_audio_html(self, audio_url: str) -> str:
+        """Create HTML audio player"""
+        return f'''<div class="anycoder-music" style="max-width:420px;margin:16px auto;padding:12px 16px;border:1px solid #e5e7eb;border-radius:12px;background:linear-gradient(180deg,#fafafa,#f3f4f6);box-shadow:0 2px 8px rgba(0,0,0,0.06)">
+                     <div style="font-size:13px;color:#374151;margin-bottom:8px;display:flex;align-items:center;gap:6px">
+                       <span>🎵 Generated music</span>
+                     </div>
+                     <audio controls autoplay loop style="width:100%;outline:none;">
+                       <source src="{audio_url}" type="audio/mpeg" />
+                       Your browser does not support the audio element.
+                     </audio>
+                   </div>'''
+# Global media generator instance
+media_generator = MediaGenerator()
+# Export main functions
+def generate_image_with_qwen(prompt: str, image_index: int = 0, token: Optional[gr.OAuthToken] = None) -> str:
+    return media_generator.generate_image_with_qwen(prompt, image_index, token)
+def generate_image_to_image(input_image_data, prompt: str, token: Optional[gr.OAuthToken] = None) -> str:
+    return media_generator.generate_image_to_image(input_image_data, prompt, token)
+def generate_video_from_image(input_image_data, prompt: str, session_id: Optional[str] = None,
+                            token: Optional[gr.OAuthToken] = None) -> str:
+    return media_generator.generate_video_from_image(input_image_data, prompt, session_id, token)
+def generate_video_from_text(prompt: str, session_id: Optional[str] = None,
+                           token: Optional[gr.OAuthToken] = None) -> str:
+    return media_generator.generate_video_from_text(prompt, session_id, token)
+def generate_music_from_text(prompt: str, music_length_ms: int = 30000, session_id: Optional[str] = None,
+                           token: Optional[gr.OAuthToken] = None) -> str:
+    return media_generator.generate_music_from_text(prompt, music_length_ms, session_id, token)