mgbam commited on
Commit
0c8eb2b
·
verified ·
1 Parent(s): 42482a9

Rename models.py to utils.py

Browse files
Files changed (2) hide show
  1. models.py +0 -273
  2. utils.py +539 -0
models.py DELETED
@@ -1,273 +0,0 @@
1
- import os
2
- from huggingface_hub import InferenceClient
3
- from openai import OpenAI
4
- from mistralai import Mistral
5
-
6
- AVAILABLE_MODELS = [
7
- {
8
- "name": "Moonshot Kimi-K2",
9
- "id": "moonshotai/Kimi-K2-Instruct",
10
- "description": "Moonshot AI Kimi-K2-Instruct model for code generation and general tasks"
11
- },
12
- {
13
- "name": "Kimi K2 Turbo (Preview)",
14
- "id": "kimi-k2-turbo-preview",
15
- "description": "Moonshot AI Kimi K2 Turbo via OpenAI-compatible API"
16
- },
17
- {
18
- "name": "DeepSeek V3",
19
- "id": "deepseek-ai/DeepSeek-V3-0324",
20
- "description": "DeepSeek V3 model for code generation"
21
- },
22
- {
23
- "name": "DeepSeek V3.1",
24
- "id": "deepseek-ai/DeepSeek-V3.1",
25
- "description": "DeepSeek V3.1 model for code generation and general tasks"
26
- },
27
- {
28
- "name": "DeepSeek R1",
29
- "id": "deepseek-ai/DeepSeek-R1-0528",
30
- "description": "DeepSeek R1 model for code generation"
31
- },
32
- {
33
- "name": "ERNIE-4.5-VL",
34
- "id": "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT",
35
- "description": "ERNIE-4.5-VL model for multimodal code generation with image support"
36
- },
37
- {
38
- "name": "MiniMax M1",
39
- "id": "MiniMaxAI/MiniMax-M1-80k",
40
- "description": "MiniMax M1 model for code generation and general tasks"
41
- },
42
- {
43
- "name": "Qwen3-235B-A22B",
44
- "id": "Qwen/Qwen3-235B-A22B",
45
- "description": "Qwen3-235B-A22B model for code generation and general tasks"
46
- },
47
- {
48
- "name": "SmolLM3-3B",
49
- "id": "HuggingFaceTB/SmolLM3-3B",
50
- "description": "SmolLM3-3B model for code generation and general tasks"
51
- },
52
- {
53
- "name": "GLM-4.5",
54
- "id": "zai-org/GLM-4.5",
55
- "description": "GLM-4.5 model with thinking capabilities for advanced code generation"
56
- },
57
- {
58
- "name": "GLM-4.5V",
59
- "id": "zai-org/GLM-4.5V",
60
- "description": "GLM-4.5V multimodal model with image understanding for code generation"
61
- },
62
- {
63
- "name": "GLM-4.1V-9B-Thinking",
64
- "id": "THUDM/GLM-4.1V-9B-Thinking",
65
- "description": "GLM-4.1V-9B-Thinking model for multimodal code generation with image support"
66
- },
67
- {
68
- "name": "Qwen3-235B-A22B-Instruct-2507",
69
- "id": "Qwen/Qwen3-235B-A22B-Instruct-2507",
70
- "description": "Qwen3-235B-A22B-Instruct-2507 model for code generation and general tasks"
71
- },
72
- {
73
- "name": "Qwen3-Coder-480B-A35B-Instruct",
74
- "id": "Qwen/Qwen3-Coder-480B-A35B-Instruct",
75
- "description": "Qwen3-Coder-480B-A35B-Instruct model for advanced code generation and programming tasks"
76
- },
77
- {
78
- "name": "Qwen3-32B",
79
- "id": "Qwen/Qwen3-32B",
80
- "description": "Qwen3-32B model for code generation and general tasks"
81
- },
82
- {
83
- "name": "Qwen3-4B-Instruct-2507",
84
- "id": "Qwen/Qwen3-4B-Instruct-2507",
85
- "description": "Qwen3-4B-Instruct-2507 model for code generation and general tasks"
86
- },
87
- {
88
- "name": "Qwen3-4B-Thinking-2507",
89
- "id": "Qwen/Qwen3-4B-Thinking-2507",
90
- "description": "Qwen3-4B-Thinking-2507 model with advanced reasoning capabilities for code generation and general tasks"
91
- },
92
- {
93
- "name": "Qwen3-235B-A22B-Thinking",
94
- "id": "Qwen/Qwen3-235B-A22B-Thinking-2507",
95
- "description": "Qwen3-235B-A22B-Thinking model with advanced reasoning capabilities"
96
- },
97
- {
98
- "name": "Qwen3-30B-A3B-Instruct-2507",
99
- "id": "qwen3-30b-a3b-instruct-2507",
100
- "description": "Qwen3-30B-A3B-Instruct model via Alibaba Cloud DashScope API"
101
- },
102
- {
103
- "name": "Qwen3-30B-A3B-Thinking-2507",
104
- "id": "qwen3-30b-a3b-thinking-2507",
105
- "description": "Qwen3-30B-A3B-Thinking model with advanced reasoning via Alibaba Cloud DashScope API"
106
- },
107
- {
108
- "name": "Qwen3-Coder-30B-A3B-Instruct",
109
- "id": "qwen3-coder-30b-a3b-instruct",
110
- "description": "Qwen3-Coder-30B-A3B-Instruct model for advanced code generation via Alibaba Cloud DashScope API"
111
- },
112
- {
113
- "name": "Cohere Command-A Reasoning 08-2025",
114
- "id": "CohereLabs/command-a-reasoning-08-2025",
115
- "description": "Cohere Labs Command-A Reasoning (Aug 2025) via Hugging Face InferenceClient"
116
- },
117
- {
118
- "name": "StepFun Step-3",
119
- "id": "step-3",
120
- "description": "StepFun Step-3 model - AI chat assistant by 阶跃星辰 with multilingual capabilities"
121
- },
122
- {
123
- "name": "Codestral 2508",
124
- "id": "codestral-2508",
125
- "description": "Mistral Codestral model - specialized for code generation and programming tasks"
126
- },
127
- {
128
- "name": "Mistral Medium 2508",
129
- "id": "mistral-medium-2508",
130
- "description": "Mistral Medium 2508 model via Mistral API for general tasks and coding"
131
- },
132
- {
133
- "name": "Gemini 2.5 Flash",
134
- "id": "gemini-2.5-flash",
135
- "description": "Google Gemini 2.5 Flash via OpenAI-compatible API"
136
- },
137
- {
138
- "name": "Gemini 2.5 Pro",
139
- "id": "gemini-2.5-pro",
140
- "description": "Google Gemini 2.5 Pro via OpenAI-compatible API"
141
- },
142
- {
143
- "name": "GPT-OSS-120B",
144
- "id": "openai/gpt-oss-120b",
145
- "description": "OpenAI GPT-OSS-120B model for advanced code generation and general tasks"
146
- },
147
- {
148
- "name": "GPT-OSS-20B",
149
- "id": "openai/gpt-oss-20b",
150
- "description": "OpenAI GPT-OSS-20B model for code generation and general tasks"
151
- },
152
- {
153
- "name": "GPT-5",
154
- "id": "gpt-5",
155
- "description": "OpenAI GPT-5 model for advanced code generation and general tasks"
156
- },
157
- {
158
- "name": "Grok-4",
159
- "id": "grok-4",
160
- "description": "Grok-4 model via Poe (OpenAI-compatible) for advanced tasks"
161
- },
162
- {
163
- "name": "Claude-Opus-4.1",
164
- "id": "claude-opus-4.1",
165
- "description": "Anthropic Claude Opus 4.1 via Poe (OpenAI-compatible)"
166
- }
167
- ]
168
-
169
- # Default model selection
170
- DEFAULT_MODEL_NAME = "Qwen3-Coder-480B-A35B-Instruct"
171
- DEFAULT_MODEL = None
172
- for _m in AVAILABLE_MODELS:
173
- if _m.get("name") == DEFAULT_MODEL_NAME:
174
- DEFAULT_MODEL = _m
175
- break
176
- if DEFAULT_MODEL is None and AVAILABLE_MODELS:
177
- DEFAULT_MODEL = AVAILABLE_MODELS[0]
178
-
179
- # HF Inference Client
180
- HF_TOKEN = os.getenv('HF_TOKEN')
181
- if not HF_TOKEN:
182
- raise RuntimeError("HF_TOKEN environment variable is not set. Please set it to your Hugging Face API token.")
183
-
184
- def get_inference_client(model_id, provider="auto"):
185
- """Return an InferenceClient with provider based on model_id and user selection."""
186
- if model_id == "qwen3-30b-a3b-instruct-2507":
187
- # Use DashScope OpenAI client
188
- return OpenAI(
189
- api_key=os.getenv("DASHSCOPE_API_KEY"),
190
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
191
- )
192
- elif model_id == "qwen3-30b-a3b-thinking-2507":
193
- # Use DashScope OpenAI client for Thinking model
194
- return OpenAI(
195
- api_key=os.getenv("DASHSCOPE_API_KEY"),
196
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
197
- )
198
- elif model_id == "qwen3-coder-30b-a3b-instruct":
199
- # Use DashScope OpenAI client for Coder model
200
- return OpenAI(
201
- api_key=os.getenv("DASHSCOPE_API_KEY"),
202
- base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
203
- )
204
- elif model_id == "gpt-5":
205
- # Use Poe (OpenAI-compatible) client for GPT-5 model
206
- return OpenAI(
207
- api_key=os.getenv("POE_API_KEY"),
208
- base_url="https://api.poe.com/v1"
209
- )
210
- elif model_id == "grok-4":
211
- # Use Poe (OpenAI-compatible) client for Grok-4 model
212
- return OpenAI(
213
- api_key=os.getenv("POE_API_KEY"),
214
- base_url="https://api.poe.com/v1"
215
- )
216
- elif model_id == "claude-opus-4.1":
217
- # Use Poe (OpenAI-compatible) client for Claude-Opus-4.1
218
- return OpenAI(
219
- api_key=os.getenv("POE_API_KEY"),
220
- base_url="https://api.poe.com/v1"
221
- )
222
- elif model_id == "step-3":
223
- # Use StepFun API client for Step-3 model
224
- return OpenAI(
225
- api_key=os.getenv("STEP_API_KEY"),
226
- base_url="https://api.stepfun.com/v1"
227
- )
228
- elif model_id == "codestral-2508" or model_id == "mistral-medium-2508":
229
- # Use Mistral client for Mistral models
230
- return Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
231
- elif model_id == "gemini-2.5-flash":
232
- # Use Google Gemini (OpenAI-compatible) client
233
- return OpenAI(
234
- api_key=os.getenv("GEMINI_API_KEY"),
235
- base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
236
- )
237
- elif model_id == "gemini-2.5-pro":
238
- # Use Google Gemini Pro (OpenAI-compatible) client
239
- return OpenAI(
240
- api_key=os.getenv("GEMINI_API_KEY"),
241
- base_url="https://generativelanguage.googleapis.com/v1beta/openai/",
242
- )
243
- elif model_id == "kimi-k2-turbo-preview":
244
- # Use Moonshot AI (OpenAI-compatible) client for Kimi K2 Turbo (Preview)
245
- return OpenAI(
246
- api_key=os.getenv("MOONSHOT_API_KEY"),
247
- base_url="https://api.moonshot.ai/v1",
248
- )
249
- elif model_id == "openai/gpt-oss-120b":
250
- provider = "groq"
251
- elif model_id == "openai/gpt-oss-20b":
252
- provider = "groq"
253
- elif model_id == "moonshotai/Kimi-K2-Instruct":
254
- provider = "groq"
255
- elif model_id == "Qwen/Qwen3-235B-A22B":
256
- provider = "cerebras"
257
- elif model_id == "Qwen/Qwen3-235B-A22B-Instruct-2507":
258
- provider = "cerebras"
259
- elif model_id == "Qwen/Qwen3-32B":
260
- provider = "cerebras"
261
- elif model_id == "Qwen/Qwen3-235B-A22B-Thinking-2507":
262
- provider = "cerebras"
263
- elif model_id == "Qwen/Qwen3-Coder-480B-A35B-Instruct":
264
- provider = "cerebras"
265
- elif model_id == "deepseek-ai/DeepSeek-V3.1":
266
- provider = "novita"
267
- elif model_id == "zai-org/GLM-4.5":
268
- provider = "fireworks-ai"
269
- return InferenceClient(
270
- provider=provider,
271
- api_key=HF_TOKEN,
272
- bill_to="huggingface"
273
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
utils.py ADDED
@@ -0,0 +1,539 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for file handling, text processing, OCR, and general operations.
3
+ """
4
+
5
+ import os
6
+ import re
7
+ import mimetypes
8
+ import tempfile
9
+ import uuid
10
+ import datetime
11
+ import base64
12
+ import time
13
+ import threading
14
+ import atexit
15
+ from typing import Dict, List, Optional, Tuple, Union
16
+ from pathlib import Path
17
+
18
+ import PyPDF2
19
+ import docx
20
+ import cv2
21
+ import numpy as np
22
+ from PIL import Image
23
+ import pytesseract
24
+ from huggingface_hub import InferenceClient, HfApi
25
+ import gradio as gr
26
+
27
+ from config import HF_TOKEN, SEARCH_START, DIVIDER, REPLACE_END, TEMP_DIR_TTL_SECONDS
28
+
29
+ # Global temp file tracking
30
+ MEDIA_TEMP_DIR = os.path.join(tempfile.gettempdir(), "anycoder_media")
31
+ VIDEO_TEMP_DIR = os.path.join(tempfile.gettempdir(), "anycoder_videos")
32
+ AUDIO_TEMP_DIR = os.path.join(tempfile.gettempdir(), "anycoder_audio")
33
+
34
+ _SESSION_MEDIA_FILES: Dict[str, List[str]] = {}
35
+ _SESSION_VIDEO_FILES: Dict[str, List[str]] = {}
36
+ _SESSION_AUDIO_FILES: Dict[str, List[str]] = {}
37
+ _MEDIA_FILES_LOCK = threading.Lock()
38
+ _VIDEO_FILES_LOCK = threading.Lock()
39
+ _AUDIO_FILES_LOCK = threading.Lock()
40
+
41
+ temp_media_files = {}
42
+
43
+ def ensure_temp_dirs():
44
+ """Ensure all temporary directories exist"""
45
+ for temp_dir in [MEDIA_TEMP_DIR, VIDEO_TEMP_DIR, AUDIO_TEMP_DIR]:
46
+ try:
47
+ os.makedirs(temp_dir, exist_ok=True)
48
+ except Exception:
49
+ pass
50
+
51
+ def get_inference_client(model_id: str, provider: str = "auto"):
52
+ """Return an InferenceClient based on model_id and provider"""
53
+ if not HF_TOKEN:
54
+ raise RuntimeError("HF_TOKEN environment variable is not set")
55
+
56
+ # Special API handling for specific models
57
+ openai_models = {
58
+ "qwen3-30b-a3b-instruct-2507": {
59
+ "api_key": os.getenv("DASHSCOPE_API_KEY"),
60
+ "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1"
61
+ },
62
+ "gpt-5": {
63
+ "api_key": os.getenv("POE_API_KEY"),
64
+ "base_url": "https://api.poe.com/v1"
65
+ },
66
+ "kimi-k2-turbo-preview": {
67
+ "api_key": os.getenv("MOONSHOT_API_KEY"),
68
+ "base_url": "https://api.moonshot.ai/v1"
69
+ },
70
+ "gemini-2.5-flash": {
71
+ "api_key": os.getenv("GEMINI_API_KEY"),
72
+ "base_url": "https://generativelanguage.googleapis.com/v1beta/openai/"
73
+ }
74
+ }
75
+
76
+ if model_id in openai_models:
77
+ from openai import OpenAI
78
+ config = openai_models[model_id]
79
+ return OpenAI(api_key=config["api_key"], base_url=config["base_url"])
80
+
81
+ # Mistral models
82
+ if model_id in ("codestral-2508", "mistral-medium-2508"):
83
+ from mistralai import Mistral
84
+ return Mistral(api_key=os.getenv("MISTRAL_API_KEY"))
85
+
86
+ # Provider-specific routing
87
+ provider_map = {
88
+ "openai/gpt-oss-120b": "groq",
89
+ "openai/gpt-oss-20b": "groq",
90
+ "Qwen/Qwen3-235B-A22B": "cerebras",
91
+ "Qwen/Qwen3-Coder-480B-A35B-Instruct": "cerebras",
92
+ "deepseek-ai/DeepSeek-V3.1": "novita",
93
+ "zai-org/GLM-4.5": "fireworks-ai"
94
+ }
95
+
96
+ if model_id in provider_map:
97
+ provider = provider_map[model_id]
98
+
99
+ return InferenceClient(
100
+ provider=provider,
101
+ api_key=HF_TOKEN,
102
+ bill_to="huggingface"
103
+ )
104
+
105
+ def remove_code_block(text: str) -> str:
106
+ """Remove code block markers from text"""
107
+ if not text:
108
+ return text
109
+
110
+ patterns = [
111
+ r'```(?:html|HTML)\n([\s\S]+?)\n```',
112
+ r'```\n([\s\S]+?)\n```',
113
+ r'```([\s\S]+?)```'
114
+ ]
115
+
116
+ for pattern in patterns:
117
+ match = re.search(pattern, text, re.DOTALL)
118
+ if match:
119
+ extracted = match.group(1).strip()
120
+
121
+ # Remove language marker line if present
122
+ lines = extracted.split('\n', 1)
123
+ if lines[0].strip().lower() in ['python', 'html', 'css', 'javascript', 'json']:
124
+ return lines[1] if len(lines) > 1 else ''
125
+
126
+ # Handle HTML content with potential prefixes
127
+ for tag in ['<!DOCTYPE html', '<html']:
128
+ idx = extracted.find(tag)
129
+ if idx > 0:
130
+ return extracted[idx:].strip()
131
+
132
+ return extracted
133
+
134
+ # Check if the entire text is HTML
135
+ stripped = text.strip()
136
+ if stripped.startswith(('<!DOCTYPE html>', '<html', '<')):
137
+ for tag in ['<!DOCTYPE html', '<html']:
138
+ idx = stripped.find(tag)
139
+ if idx > 0:
140
+ return stripped[idx:].strip()
141
+ return stripped
142
+
143
+ return text.strip()
144
+
145
+ def extract_text_from_image(image_path: str) -> str:
146
+ """Extract text from image using OCR"""
147
+ try:
148
+ # Check if tesseract is available
149
+ try:
150
+ pytesseract.get_tesseract_version()
151
+ except Exception:
152
+ return "Error: Tesseract OCR is not installed. Please install Tesseract to extract text from images."
153
+
154
+ # Read and process image
155
+ image = cv2.imread(image_path)
156
+ if image is None:
157
+ return "Error: Could not read image file"
158
+
159
+ # Convert and preprocess
160
+ image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
161
+ gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
162
+ _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
163
+
164
+ # Extract text
165
+ text = pytesseract.image_to_string(binary, config='--psm 6')
166
+ return text.strip() if text.strip() else "No text found in image"
167
+
168
+ except Exception as e:
169
+ return f"Error extracting text from image: {e}"
170
+
171
+ def extract_text_from_file(file_path: str) -> str:
172
+ """Extract text from various file formats"""
173
+ if not file_path or not os.path.exists(file_path):
174
+ return ""
175
+
176
+ ext = os.path.splitext(file_path)[1].lower()
177
+
178
+ try:
179
+ if ext == ".pdf":
180
+ with open(file_path, "rb") as f:
181
+ reader = PyPDF2.PdfReader(f)
182
+ return "\n".join(page.extract_text() or "" for page in reader.pages)
183
+
184
+ elif ext in [".txt", ".md", ".csv"]:
185
+ with open(file_path, "r", encoding="utf-8") as f:
186
+ return f.read()
187
+
188
+ elif ext == ".docx":
189
+ doc = docx.Document(file_path)
190
+ return "\n".join([para.text for para in doc.paragraphs])
191
+
192
+ elif ext in [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".tif", ".gif", ".webp"]:
193
+ return extract_text_from_image(file_path)
194
+
195
+ else:
196
+ return ""
197
+
198
+ except Exception as e:
199
+ return f"Error extracting text: {e}"
200
+
201
+ def compress_media_for_data_uri(media_bytes: bytes, media_type: str = "video", max_size_mb: int = 8) -> bytes:
202
+ """Compress media bytes for data URI embedding"""
203
+ max_size = max_size_mb * 1024 * 1024
204
+
205
+ if len(media_bytes) <= max_size:
206
+ return media_bytes
207
+
208
+ print(f"[MediaCompress] {media_type} size {len(media_bytes)} bytes exceeds {max_size_mb}MB limit, attempting compression")
209
+
210
+ try:
211
+ import subprocess
212
+
213
+ # Create temp files
214
+ with tempfile.NamedTemporaryFile(suffix=f'.{media_type[:3]}', delete=False) as temp_input:
215
+ temp_input.write(media_bytes)
216
+ temp_input_path = temp_input.name
217
+
218
+ temp_output_path = temp_input_path.replace(f'.{media_type[:3]}', f'_compressed.{media_type[:3]}')
219
+
220
+ try:
221
+ if media_type == "video":
222
+ # Compress video with ffmpeg
223
+ subprocess.run([
224
+ 'ffmpeg', '-i', temp_input_path,
225
+ '-vcodec', 'libx264', '-crf', '30', '-preset', 'fast',
226
+ '-vf', 'scale=480:-1', '-r', '15',
227
+ '-an', # Remove audio
228
+ '-y', temp_output_path
229
+ ], check=True, capture_output=True, stderr=subprocess.DEVNULL)
230
+ else: # audio
231
+ subprocess.run([
232
+ 'ffmpeg', '-i', temp_input_path,
233
+ '-codec:a', 'libmp3lame', '-b:a', '64k',
234
+ '-y', temp_output_path
235
+ ], check=True, capture_output=True, stderr=subprocess.DEVNULL)
236
+
237
+ # Read compressed media
238
+ with open(temp_output_path, 'rb') as f:
239
+ compressed_bytes = f.read()
240
+
241
+ print(f"[MediaCompress] Compressed from {len(media_bytes)} to {len(compressed_bytes)} bytes")
242
+ return compressed_bytes
243
+
244
+ except (subprocess.CalledProcessError, FileNotFoundError):
245
+ print(f"[MediaCompress] ffmpeg compression failed, using original {media_type}")
246
+ return media_bytes
247
+ finally:
248
+ # Clean up temp files
249
+ for path in [temp_input_path, temp_output_path]:
250
+ try:
251
+ if os.path.exists(path):
252
+ os.remove(path)
253
+ except Exception:
254
+ pass
255
+
256
+ except Exception as e:
257
+ print(f"[MediaCompress] Compression failed: {e}, using original {media_type}")
258
+ return media_bytes
259
+
260
+ def create_temp_media_url(media_bytes: bytes, filename: str, media_type: str = "image",
261
+ session_id: Optional[str] = None) -> str:
262
+ """Create a temporary file and return a local URL for preview"""
263
+ try:
264
+ # Create unique filename
265
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
266
+ unique_id = str(uuid.uuid4())[:8]
267
+ base_name, ext = os.path.splitext(filename)
268
+ unique_filename = f"{media_type}_{timestamp}_{unique_id}_{base_name}{ext}"
269
+
270
+ # Create temporary file
271
+ ensure_temp_dirs()
272
+ temp_path = os.path.join(MEDIA_TEMP_DIR, unique_filename)
273
+
274
+ with open(temp_path, 'wb') as f:
275
+ f.write(media_bytes)
276
+
277
+ # Track file for cleanup
278
+ if session_id:
279
+ track_session_media_file(session_id, temp_path)
280
+
281
+ # Store file info
282
+ file_id = f"{media_type}_{unique_id}"
283
+ temp_media_files[file_id] = {
284
+ 'path': temp_path,
285
+ 'filename': filename,
286
+ 'media_type': media_type,
287
+ 'media_bytes': media_bytes
288
+ }
289
+
290
+ file_url = f"file://{temp_path}"
291
+ print(f"[TempMedia] Created temporary {media_type} file: {file_url}")
292
+ return file_url
293
+
294
+ except Exception as e:
295
+ print(f"[TempMedia] Failed to create temporary file: {str(e)}")
296
+ return f"Error creating temporary {media_type} file: {str(e)}"
297
+
298
+ def track_session_media_file(session_id: Optional[str], file_path: str) -> None:
299
+ """Track a media file for session-based cleanup"""
300
+ if not session_id or not file_path:
301
+ return
302
+
303
+ with _MEDIA_FILES_LOCK:
304
+ if session_id not in _SESSION_MEDIA_FILES:
305
+ _SESSION_MEDIA_FILES[session_id] = []
306
+ _SESSION_MEDIA_FILES[session_id].append(file_path)
307
+
308
+ def cleanup_session_media(session_id: Optional[str]) -> None:
309
+ """Clean up media files for a specific session"""
310
+ if not session_id:
311
+ return
312
+
313
+ with _MEDIA_FILES_LOCK:
314
+ files_to_clean = _SESSION_MEDIA_FILES.pop(session_id, [])
315
+
316
+ for path in files_to_clean:
317
+ try:
318
+ if path and os.path.exists(path):
319
+ os.unlink(path)
320
+ except Exception:
321
+ pass
322
+
323
+ def reap_old_media(ttl_seconds: int = TEMP_DIR_TTL_SECONDS) -> None:
324
+ """Delete old media files based on modification time"""
325
+ try:
326
+ ensure_temp_dirs()
327
+ now_ts = time.time()
328
+
329
+ for temp_dir in [MEDIA_TEMP_DIR, VIDEO_TEMP_DIR, AUDIO_TEMP_DIR]:
330
+ if not os.path.exists(temp_dir):
331
+ continue
332
+
333
+ for name in os.listdir(temp_dir):
334
+ path = os.path.join(temp_dir, name)
335
+ if os.path.isfile(path):
336
+ try:
337
+ mtime = os.path.getmtime(path)
338
+ if (now_ts - mtime) > ttl_seconds:
339
+ os.unlink(path)
340
+ except Exception:
341
+ pass
342
+ except Exception:
343
+ pass
344
+
345
+ def cleanup_all_temp_media():
346
+ """Clean up all temporary media files"""
347
+ try:
348
+ print("[Cleanup] Cleaning up temporary media files...")
349
+
350
+ # Clean up temp_media_files registry
351
+ for file_id, file_info in temp_media_files.items():
352
+ try:
353
+ if os.path.exists(file_info['path']):
354
+ os.unlink(file_info['path'])
355
+ except Exception:
356
+ pass
357
+ temp_media_files.clear()
358
+
359
+ # Clean up all session files
360
+ with _MEDIA_FILES_LOCK:
361
+ for session_files in _SESSION_MEDIA_FILES.values():
362
+ for path in session_files:
363
+ try:
364
+ if path and os.path.exists(path):
365
+ os.unlink(path)
366
+ except Exception:
367
+ pass
368
+ _SESSION_MEDIA_FILES.clear()
369
+
370
+ print("[Cleanup] Temporary media cleanup completed")
371
+ except Exception as e:
372
+ print(f"[Cleanup] Error during cleanup: {str(e)}")
373
+
374
+ def process_image_for_model(image) -> Optional[str]:
375
+ """Convert image to base64 for model input"""
376
+ if image is None:
377
+ return None
378
+
379
+ import io
380
+ import base64
381
+ import numpy as np
382
+ from PIL import Image as PILImage
383
+
384
+ # Handle numpy array from Gradio
385
+ if isinstance(image, np.ndarray):
386
+ image = PILImage.fromarray(image)
387
+
388
+ buffer = io.BytesIO()
389
+ image.save(buffer, format='PNG')
390
+ img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
391
+ return f"data:image/png;base64,{img_str}"
392
+
393
+ def create_multimodal_message(text: str, image=None) -> Dict:
394
+ """Create a chat message with optional image"""
395
+ if image is None:
396
+ return {"role": "user", "content": text}
397
+
398
+ # For broad provider compatibility, use string content with note
399
+ return {"role": "user", "content": f"{text}\n\n[An image was provided as reference.]"}
400
+
401
+ def apply_search_replace_changes(original_content: str, changes_text: str) -> str:
402
+ """Apply search/replace changes to content"""
403
+ if not changes_text.strip():
404
+ return original_content
405
+
406
+ # CSS rule fallback for non-block formats
407
+ if (SEARCH_START not in changes_text) and (DIVIDER not in changes_text) and (REPLACE_END not in changes_text):
408
+ try:
409
+ updated_content = original_content
410
+ replaced_any_rule = False
411
+
412
+ # Find CSS-like rule blocks
413
+ css_blocks = re.findall(r"([^{]+)\{([\s\S]*?)\}", changes_text, flags=re.MULTILINE)
414
+
415
+ for selector_raw, body_raw in css_blocks:
416
+ selector = selector_raw.strip()
417
+ body = body_raw.strip()
418
+ if not selector:
419
+ continue
420
+
421
+ pattern = re.compile(rf"({re.escape(selector)}\s*\{{)([\s\S]*?)(\}})")
422
+
423
+ def _replace_rule(match):
424
+ nonlocal replaced_any_rule
425
+ replaced_any_rule = True
426
+ prefix, existing_body, suffix = match.groups()
427
+
428
+ # Preserve indentation
429
+ first_line_indent = ""
430
+ for line in existing_body.splitlines():
431
+ stripped = line.lstrip(" \t")
432
+ if stripped:
433
+ first_line_indent = line[: len(line) - len(stripped)]
434
+ break
435
+
436
+ if body:
437
+ new_body_lines = [first_line_indent + line if line.strip() else line for line in body.splitlines()]
438
+ new_body_text = "\n" + "\n".join(new_body_lines) + "\n"
439
+ else:
440
+ new_body_text = existing_body
441
+
442
+ return f"{prefix}{new_body_text}{suffix}"
443
+
444
+ updated_content, num_subs = pattern.subn(_replace_rule, updated_content, count=1)
445
+
446
+ if replaced_any_rule:
447
+ return updated_content
448
+ except Exception:
449
+ pass
450
+
451
+ # Parse search/replace blocks
452
+ blocks = []
453
+ current_block = ""
454
+ lines = changes_text.split('\n')
455
+
456
+ for line in lines:
457
+ if line.strip() == SEARCH_START:
458
+ if current_block.strip():
459
+ blocks.append(current_block.strip())
460
+ current_block = line + '\n'
461
+ elif line.strip() == REPLACE_END:
462
+ current_block += line + '\n'
463
+ blocks.append(current_block.strip())
464
+ current_block = ""
465
+ else:
466
+ current_block += line + '\n'
467
+
468
+ if current_block.strip():
469
+ blocks.append(current_block.strip())
470
+
471
+ modified_content = original_content
472
+
473
+ for block in blocks:
474
+ if not block.strip():
475
+ continue
476
+
477
+ lines = block.split('\n')
478
+ search_lines = []
479
+ replace_lines = []
480
+ in_search = False
481
+ in_replace = False
482
+
483
+ for line in lines:
484
+ if line.strip() == SEARCH_START:
485
+ in_search = True
486
+ in_replace = False
487
+ elif line.strip() == DIVIDER:
488
+ in_search = False
489
+ in_replace = True
490
+ elif line.strip() == REPLACE_END:
491
+ in_replace = False
492
+ elif in_search:
493
+ search_lines.append(line)
494
+ elif in_replace:
495
+ replace_lines.append(line)
496
+
497
+ if search_lines:
498
+ search_text = '\n'.join(search_lines).strip()
499
+ replace_text = '\n'.join(replace_lines).strip()
500
+
501
+ if search_text in modified_content:
502
+ modified_content = modified_content.replace(search_text, replace_text)
503
+ else:
504
+ print(f"Warning: Search text not found: {search_text[:100]}...")
505
+
506
+ return modified_content
507
+
508
+ def validate_video_html(video_html: str) -> bool:
509
+ """Validate that video HTML is well-formed and safe"""
510
+ try:
511
+ if not video_html or not video_html.strip():
512
+ return False
513
+
514
+ if '<video' not in video_html or '</video>' not in video_html:
515
+ return False
516
+
517
+ if '<source' not in video_html:
518
+ return False
519
+
520
+ # Check for valid video sources
521
+ has_data_uri = 'data:video/mp4;base64,' in video_html
522
+ has_hf_url = 'https://huggingface.co/datasets/' in video_html and '/resolve/main/' in video_html
523
+ has_file_url = 'file://' in video_html
524
+
525
+ if not (has_data_uri or has_hf_url or has_file_url):
526
+ return False
527
+
528
+ # Basic HTML structure validation
529
+ video_start = video_html.find('<video')
530
+ video_end = video_html.find('</video>') + 8
531
+ if video_start == -1 or video_end == 7:
532
+ return False
533
+
534
+ return True
535
+ except Exception:
536
+ return False
537
+
538
+ # Register cleanup handler
539
+ atexit.register(cleanup_all_temp_media)