mgbam commited on
Commit
8341729
·
verified ·
1 Parent(s): 69fe504

Create code _processing.py

Browse files
Files changed (1) hide show
  1. code _processing.py +700 -0
code _processing.py ADDED
@@ -0,0 +1,700 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code processing utilities for parsing, transforming, and managing different code formats.
3
+ """
4
+
5
+ import re
6
+ import base64
7
+ import json
8
+ from typing import Dict, List, Optional, Tuple, Union
9
+ from pathlib import Path
10
+
11
+ from utils import apply_search_replace_changes, validate_video_html
12
+ from media_generation import generate_image_with_qwen, generate_image_to_image, generate_video_from_image, generate_video_from_text, generate_music_from_text
13
+ from config import SEARCH_START, DIVIDER, REPLACE_END
14
+
15
+ class CodeProcessor:
16
+ """Handles processing and transformation of various code formats"""
17
+
18
+ @staticmethod
19
+ def is_streamlit_code(code: str) -> bool:
20
+ """Check if Python code is a Streamlit app"""
21
+ if not code:
22
+ return False
23
+ lowered = code.lower()
24
+ return ("import streamlit" in lowered) or ("from streamlit" in lowered) or ("st." in code and "streamlit" in lowered)
25
+
26
+ @staticmethod
27
+ def is_gradio_code(code: str) -> bool:
28
+ """Check if Python code is a Gradio app"""
29
+ if not code:
30
+ return False
31
+ lowered = code.lower()
32
+ return (
33
+ "import gradio" in lowered or
34
+ "from gradio" in lowered or
35
+ "gr.Interface(" in code or
36
+ "gr.Blocks(" in code
37
+ )
38
+
39
+ @staticmethod
40
+ def extract_html_document(text: str) -> str:
41
+ """Extract HTML document from text, ignoring planning notes"""
42
+ if not text:
43
+ return text
44
+ lower = text.lower()
45
+ idx = lower.find("<!doctype html")
46
+ if idx == -1:
47
+ idx = lower.find("<html")
48
+ return text[idx:] if idx != -1 else text
49
+
50
+ class TransformersJSProcessor:
51
+ """Handles Transformers.js specific code processing"""
52
+
53
+ @staticmethod
54
+ def parse_transformers_js_output(text: str) -> Dict[str, str]:
55
+ """Parse transformers.js output and extract the three files"""
56
+ files = {
57
+ 'index.html': '',
58
+ 'index.js': '',
59
+ 'style.css': ''
60
+ }
61
+
62
+ if not text:
63
+ return files
64
+
65
+ # Multiple patterns for different code block variations
66
+ html_patterns = [
67
+ r'```html\s*\n([\s\S]*?)(?:```|\Z)',
68
+ r'```htm\s*\n([\s\S]*?)(?:```|\Z)',
69
+ r'```\s*(?:index\.html|html)\s*\n([\s\S]*?)(?:```|\Z)'
70
+ ]
71
+
72
+ js_patterns = [
73
+ r'```javascript\s*\n([\s\S]*?)(?:```|\Z)',
74
+ r'```js\s*\n([\s\S]*?)(?:```|\Z)',
75
+ r'```\s*(?:index\.js|javascript|js)\s*\n([\s\S]*?)(?:```|\Z)'
76
+ ]
77
+
78
+ css_patterns = [
79
+ r'```css\s*\n([\s\S]*?)(?:```|\Z)',
80
+ r'```\s*(?:style\.css|css)\s*\n([\s\S]*?)(?:```|\Z)'
81
+ ]
82
+
83
+ # Extract content using patterns
84
+ for pattern in html_patterns:
85
+ html_match = re.search(pattern, text, re.IGNORECASE)
86
+ if html_match:
87
+ files['index.html'] = html_match.group(1).strip()
88
+ break
89
+
90
+ for pattern in js_patterns:
91
+ js_match = re.search(pattern, text, re.IGNORECASE)
92
+ if js_match:
93
+ files['index.js'] = js_match.group(1).strip()
94
+ break
95
+
96
+ for pattern in css_patterns:
97
+ css_match = re.search(pattern, text, re.IGNORECASE)
98
+ if css_match:
99
+ files['style.css'] = css_match.group(1).strip()
100
+ break
101
+
102
+ # Fallback: support === filename === format
103
+ if not (files['index.html'] and files['index.js'] and files['style.css']):
104
+ fallback_files = MultipageProcessor.parse_multipage_html_output(text)
105
+ for key in files.keys():
106
+ if key in fallback_files:
107
+ files[key] = fallback_files[key]
108
+
109
+ return files
110
+
111
+ @staticmethod
112
+ def format_transformers_js_output(files: Dict[str, str]) -> str:
113
+ """Format the three files into a single display string"""
114
+ output = []
115
+ output.append("=== index.html ===")
116
+ output.append(files.get('index.html', ''))
117
+ output.append("\n=== index.js ===")
118
+ output.append(files.get('index.js', ''))
119
+ output.append("\n=== style.css ===")
120
+ output.append(files.get('style.css', ''))
121
+ return '\n'.join(output)
122
+
123
+ @staticmethod
124
+ def build_transformers_inline_html(files: Dict[str, str]) -> str:
125
+ """Merge transformers.js files into a single HTML document"""
126
+ html = files.get('index.html') or ''
127
+ js = files.get('index.js') or ''
128
+ css = files.get('style.css') or ''
129
+
130
+ # Normalize JS imports to stable CDN
131
+ cdn_url = "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]"
132
+
133
+ def _normalize_imports(_code: str) -> str:
134
+ if not _code:
135
+ return _code or ""
136
+ _code = re.sub(r"from\s+['\"]@huggingface/transformers['\"]", f"from '{cdn_url}'", _code)
137
+ _code = re.sub(r"from\s+['\"]@xenova/transformers['\"]", f"from '{cdn_url}'", _code)
138
+ _code = re.sub(r"from\s+['\"]https://cdn.jsdelivr.net/npm/@huggingface/transformers@[^'\"]+['\"]", f"from '{cdn_url}'", _code)
139
+ _code = re.sub(r"from\s+['\"]https://cdn.jsdelivr.net/npm/@xenova/transformers@[^'\"]+['\"]", f"from '{cdn_url}'", _code)
140
+ return _code
141
+
142
+ # Extract and merge inline module scripts
143
+ inline_modules = []
144
+ try:
145
+ for _m in re.finditer(r"<script\b[^>]*type=[\"']module[\"'][^>]*>([\s\S]*?)</script>", html, flags=re.IGNORECASE):
146
+ inline_modules.append(_m.group(1))
147
+ if inline_modules:
148
+ html = re.sub(r"<script\b[^>]*type=[\"']module[\"'][^>]*>[\s\S]*?</script>\s*", "", html, flags=re.IGNORECASE)
149
+ except Exception:
150
+ pass
151
+
152
+ # Combine JS code
153
+ combined_js_parts = []
154
+ if inline_modules:
155
+ combined_js_parts.append("\n\n".join(inline_modules))
156
+ if js:
157
+ combined_js_parts.append(js)
158
+ js = "\n\n".join([p for p in combined_js_parts if (p and p.strip())])
159
+ js = _normalize_imports(js)
160
+
161
+ # Add prelude for better compatibility
162
+ if js.strip():
163
+ prelude = (
164
+ f"import {{ env }} from '{cdn_url}';\n"
165
+ "try { env.useBrowserCache = false; } catch (e) {}\n"
166
+ "try { if (env && env.backends && env.backends.onnx && env.backends.onnx.wasm) { env.backends.onnx.wasm.numThreads = 1; env.backends.onnx.wasm.proxy = false; } } catch (e) {}\n"
167
+ f"(async () => {{ try {{ if (typeof globalThis.transformers === 'undefined') {{ const m = await import('{cdn_url}'); globalThis.transformers = m; }} }} catch (e) {{}} }})();\n"
168
+ )
169
+ js = prelude + js
170
+
171
+ # Create minimal shell if needed
172
+ doc = html.strip()
173
+ if not doc or ('<html' not in doc.lower()):
174
+ doc = (
175
+ "<!DOCTYPE html>\n"
176
+ "<html>\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Transformers.js App</title>\n</head>\n"
177
+ "<body>\n<div id=\"app\"></div>\n</body>\n</html>"
178
+ )
179
+
180
+ # Remove local file references
181
+ doc = re.sub(r"<link[^>]+href=\"[^\"]*style\.css\"[^>]*>\s*", "", doc, flags=re.IGNORECASE)
182
+ doc = re.sub(r"<script[^>]+src=\"[^\"]*index\.js\"[^>]*>\s*</script>\s*", "", doc, flags=re.IGNORECASE)
183
+
184
+ # Inline CSS
185
+ if css:
186
+ style_tag = f"<style>\n{css}\n</style>"
187
+ if '</head>' in doc.lower():
188
+ match = re.search(r"</head>", doc, flags=re.IGNORECASE)
189
+ if match:
190
+ idx = match.start()
191
+ doc = doc[:idx] + style_tag + doc[idx:]
192
+ else:
193
+ match = re.search(r"<body[^>]*>", doc, flags=re.IGNORECASE)
194
+ if match:
195
+ idx = match.end()
196
+ doc = doc[:idx] + "\n" + style_tag + doc[idx:]
197
+ else:
198
+ doc = style_tag + doc
199
+
200
+ # Inline JS with debugging and cleanup
201
+ if js:
202
+ script_tag = f"<script type=\"module\">\n{js}\n</script>"
203
+ debug_overlay = TransformersJSProcessor._create_debug_overlay()
204
+ cleanup_tag = TransformersJSProcessor._create_cleanup_script()
205
+
206
+ match = re.search(r"</body>", doc, flags=re.IGNORECASE)
207
+ if match:
208
+ idx = match.start()
209
+ doc = doc[:idx] + debug_overlay + script_tag + cleanup_tag + doc[idx:]
210
+ else:
211
+ doc = doc + debug_overlay + script_tag + cleanup_tag
212
+
213
+ return doc
214
+
215
+ @staticmethod
216
+ def _create_debug_overlay() -> str:
217
+ """Create debug overlay for transformers.js apps"""
218
+ return (
219
+ "<style>\n"
220
+ "#anycoder-debug{position:fixed;left:0;right:0;bottom:0;max-height:45%;overflow:auto;"
221
+ "background:rgba(0,0,0,.85);color:#9eff9e;padding:.5em;font:12px/1.4 monospace;z-index:2147483647;display:none}"
222
+ "#anycoder-debug pre{margin:0;white-space:pre-wrap;word-break:break-word}"
223
+ "</style>\n"
224
+ "<div id=\"anycoder-debug\"></div>\n"
225
+ "<script>\n"
226
+ "(function(){\n"
227
+ " const el = document.getElementById('anycoder-debug');\n"
228
+ " function show(){ if(el && el.style.display!=='block'){ el.style.display='block'; } }\n"
229
+ " function log(msg){ try{ show(); const pre=document.createElement('pre'); pre.textContent=msg; el.appendChild(pre);}catch(e){} }\n"
230
+ " const origError = console.error.bind(console);\n"
231
+ " console.error = function(){ origError.apply(console, arguments); try{ log('console.error: ' + Array.from(arguments).map(a=>{try{return (typeof a==='string')?a:JSON.stringify(a);}catch(e){return String(a);}}).join(' ')); }catch(e){} };\n"
232
+ " window.addEventListener('error', e => { log('window.onerror: ' + (e && e.message ? e.message : 'Unknown error')); });\n"
233
+ " window.addEventListener('unhandledrejection', e => { try{ const r=e && e.reason; log('unhandledrejection: ' + (r && (r.message || JSON.stringify(r)))); }catch(err){ log('unhandledrejection'); } });\n"
234
+ "})();\n"
235
+ "</script>"
236
+ )
237
+
238
+ @staticmethod
239
+ def _create_cleanup_script() -> str:
240
+ """Create cleanup script for transformers.js apps"""
241
+ return (
242
+ "<script>\n"
243
+ "(function(){\n"
244
+ " function cleanup(){\n"
245
+ " try { if (window.caches && caches.keys) { caches.keys().then(keys => keys.forEach(k => caches.delete(k))); } } catch(e){}\n"
246
+ " try { if (window.indexedDB && indexedDB.databases) { indexedDB.databases().then(dbs => dbs.forEach(db => db && db.name && indexedDB.deleteDatabase(db.name))); } } catch(e){}\n"
247
+ " }\n"
248
+ " window.addEventListener('pagehide', cleanup, { once: true });\n"
249
+ " window.addEventListener('beforeunload', cleanup, { once: true });\n"
250
+ "})();\n"
251
+ "</script>"
252
+ )
253
+
254
+ class SvelteProcessor:
255
+ """Handles Svelte specific code processing"""
256
+
257
+ @staticmethod
258
+ def parse_svelte_output(text: str) -> Dict[str, str]:
259
+ """Parse Svelte output to extract individual files"""
260
+ files = {
261
+ 'src/App.svelte': '',
262
+ 'src/app.css': ''
263
+ }
264
+
265
+ if not text:
266
+ return files
267
+
268
+ # Extract using code block patterns
269
+ svelte_pattern = r'```svelte\s*\n([\s\S]+?)\n```'
270
+ css_pattern = r'```css\s*\n([\s\S]+?)\n```'
271
+
272
+ svelte_match = re.search(svelte_pattern, text, re.IGNORECASE)
273
+ css_match = re.search(css_pattern, text, re.IGNORECASE)
274
+
275
+ if svelte_match:
276
+ files['src/App.svelte'] = svelte_match.group(1).strip()
277
+ if css_match:
278
+ files['src/app.css'] = css_match.group(1).strip()
279
+
280
+ # Fallback: support === filename === format
281
+ if not (files['src/App.svelte'] and files['src/app.css']):
282
+ fallback_files = MultipageProcessor.parse_multipage_html_output(text)
283
+ for key in files.keys():
284
+ if key in fallback_files:
285
+ files[key] = fallback_files[key]
286
+
287
+ return files
288
+
289
+ @staticmethod
290
+ def format_svelte_output(files: Dict[str, str]) -> str:
291
+ """Format Svelte files into a single display string"""
292
+ output = []
293
+ output.append("=== src/App.svelte ===")
294
+ output.append(files.get('src/App.svelte', ''))
295
+ output.append("\n=== src/app.css ===")
296
+ output.append(files.get('src/app.css', ''))
297
+ return '\n'.join(output)
298
+
299
+ class MultipageProcessor:
300
+ """Handles multi-page HTML projects"""
301
+
302
+ @staticmethod
303
+ def parse_multipage_html_output(text: str) -> Dict[str, str]:
304
+ """Parse multi-page HTML output formatted as === filename === sections"""
305
+ if not text:
306
+ return {}
307
+
308
+ from utils import remove_code_block
309
+ cleaned = remove_code_block(text)
310
+ files: Dict[str, str] = {}
311
+
312
+ pattern = re.compile(r"^===\s*([^=\n]+?)\s*===\s*\n([\s\S]*?)(?=\n===\s*[^=\n]+?\s*===|\Z)", re.MULTILINE)
313
+
314
+ for m in pattern.finditer(cleaned):
315
+ name = m.group(1).strip()
316
+ content = m.group(2).strip()
317
+ # Remove accidental trailing fences
318
+ content = re.sub(r"^```\w*\s*\n|\n```\s*$", "", content)
319
+ files[name] = content
320
+
321
+ return files
322
+
323
+ @staticmethod
324
+ def format_multipage_output(files: Dict[str, str]) -> str:
325
+ """Format files back into === filename === sections"""
326
+ if not isinstance(files, dict) or not files:
327
+ return ""
328
+
329
+ # Order with index.html first
330
+ ordered_paths = []
331
+ if 'index.html' in files:
332
+ ordered_paths.append('index.html')
333
+ for path in sorted(files.keys()):
334
+ if path == 'index.html':
335
+ continue
336
+ ordered_paths.append(path)
337
+
338
+ parts: List[str] = []
339
+ for path in ordered_paths:
340
+ parts.append(f"=== {path} ===")
341
+ parts.append((files.get(path) or '').rstrip())
342
+
343
+ return "\n".join(parts)
344
+
345
+ @staticmethod
346
+ def validate_and_autofix_files(files: Dict[str, str]) -> Dict[str, str]:
347
+ """Ensure minimal contract for multi-file sites"""
348
+ if not isinstance(files, dict) or not files:
349
+ return files or {}
350
+
351
+ normalized: Dict[str, str] = {}
352
+ for k, v in files.items():
353
+ safe_key = k.strip().lstrip('/')
354
+ normalized[safe_key] = v
355
+
356
+ html_files = [p for p in normalized.keys() if p.lower().endswith('.html')]
357
+ has_index = 'index.html' in normalized
358
+
359
+ # Create index.html if missing but other HTML files exist
360
+ if not has_index and html_files:
361
+ links = '\n'.join([f"<li><a href=\"{p}\">{p}</a></li>" for p in html_files])
362
+ normalized['index.html'] = (
363
+ "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\"/>\n"
364
+ "<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"/>\n"
365
+ "<title>Site Index</title>\n</head>\n<body>\n<h1>Site</h1>\n<ul>\n"
366
+ + links + "\n</ul>\n</body>\n</html>"
367
+ )
368
+
369
+ # Collect asset references
370
+ asset_refs: set[str] = set()
371
+ patterns = [
372
+ re.compile(r"<link[^>]+href=\"([^\"]+)\""),
373
+ re.compile(r"<script[^>]+src=\"([^\"]+)\""),
374
+ re.compile(r"<img[^>]+src=\"([^\"]+)\""),
375
+ re.compile(r"<a[^>]+href=\"([^\"]+)\"")
376
+ ]
377
+
378
+ for path, content in list(normalized.items()):
379
+ if not path.lower().endswith('.html'):
380
+ continue
381
+ for patt in patterns:
382
+ for m in patt.finditer(content or ""):
383
+ ref = (m.group(1) or "").strip()
384
+ if not ref or ref.startswith(('http://', 'https://', 'data:', '#')):
385
+ continue
386
+ asset_refs.add(ref.lstrip('/'))
387
+
388
+ # Add minimal stubs for missing references
389
+ for ref in list(asset_refs):
390
+ if ref not in normalized:
391
+ if ref.lower().endswith('.css'):
392
+ normalized[ref] = "/* generated stub */\n"
393
+ elif ref.lower().endswith('.js'):
394
+ normalized[ref] = "// generated stub\n"
395
+ elif ref.lower().endswith('.html'):
396
+ normalized[ref] = (
397
+ "<!DOCTYPE html>\n<html lang=\"en\">\n<head><meta charset=\"utf-8\"/><meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"/><title>Page</title></head>\n"
398
+ "<body><main><h1>Placeholder page</h1><p>This page was auto-created to satisfy an internal link.</p></main></body>\n</html>"
399
+ )
400
+
401
+ return normalized
402
+
403
+ @staticmethod
404
+ def inline_multipage_into_single_preview(files: Dict[str, str]) -> str:
405
+ """Inline local CSS/JS for iframe preview"""
406
+ html = files.get('index.html', '')
407
+ if not html:
408
+ return ""
409
+
410
+ doc = html
411
+
412
+ # Inline CSS links
413
+ def _inline_css(match):
414
+ href = match.group(1)
415
+ if href in files:
416
+ return f"<style>\n{files[href]}\n</style>"
417
+ return match.group(0)
418
+
419
+ doc = re.sub(r"<link[^>]+href=\"([^\"]+)\"[^>]*/?>", _inline_css, doc, flags=re.IGNORECASE)
420
+
421
+ # Inline JS scripts
422
+ def _inline_js(match):
423
+ src = match.group(1)
424
+ if src in files:
425
+ return f"<script>\n{files[src]}\n</script>"
426
+ return match.group(0)
427
+
428
+ doc = re.sub(r"<script[^>]+src=\"([^\"]+)\"[^>]*>\s*</script>", _inline_js, doc, flags=re.IGNORECASE)
429
+
430
+ # Add client-side navigation for other pages
431
+ MultipageProcessor._add_client_side_navigation(doc, files)
432
+
433
+ return doc
434
+
435
+ @staticmethod
436
+ def _add_client_side_navigation(doc: str, files: Dict[str, str]) -> str:
437
+ """Add client-side navigation for multi-page preview"""
438
+ try:
439
+ html_pages = {k: v for k, v in files.items() if k.lower().endswith('.html')}
440
+
441
+ # Extract body content for each page
442
+ _index_body = re.search(r"<body[^>]*>([\s\S]*?)</body>", doc, flags=re.IGNORECASE)
443
+ html_pages['index.html'] = _index_body.group(1) if _index_body else doc
444
+
445
+ encoded = base64.b64encode(json.dumps(html_pages).encode('utf-8')).decode('ascii')
446
+
447
+ nav_script = (
448
+ "<script>\n"
449
+ "(function(){\n"
450
+ f" const MP_FILES = JSON.parse(atob('{encoded}'));\n"
451
+ " function extractBody(html){\n"
452
+ " try {\n"
453
+ " const doc = new DOMParser().parseFromString(html, 'text/html');\n"
454
+ " const title = doc.querySelector('title'); if (title) document.title = title.textContent || document.title;\n"
455
+ " return doc.body ? doc.body.innerHTML : html;\n"
456
+ " } catch(e){ return html; }\n"
457
+ " }\n"
458
+ " function loadPage(path){\n"
459
+ " if (!MP_FILES[path]) return false;\n"
460
+ " const bodyHTML = extractBody(MP_FILES[path]);\n"
461
+ " document.body.innerHTML = bodyHTML;\n"
462
+ " attach();\n"
463
+ " try { history.replaceState({}, '', '#'+path); } catch(e){}\n"
464
+ " return true;\n"
465
+ " }\n"
466
+ " function clickHandler(e){\n"
467
+ " const a = e.target && e.target.closest ? e.target.closest('a') : null;\n"
468
+ " if (!a) return;\n"
469
+ " const href = a.getAttribute('href') || '';\n"
470
+ " if (!href || href.startsWith('#') || /^https?:/i.test(href) || href.startsWith('mailto:') || href.startsWith('tel:')) return;\n"
471
+ " const clean = href.split('#')[0].split('?')[0];\n"
472
+ " if (MP_FILES[clean]) { e.preventDefault(); loadPage(clean); }\n"
473
+ " }\n"
474
+ " function attach(){ document.removeEventListener('click', clickHandler, true); document.addEventListener('click', clickHandler, true); }\n"
475
+ " document.addEventListener('DOMContentLoaded', function(){ attach(); const initial = (location.hash||'').slice(1); if (initial && MP_FILES[initial]) loadPage(initial); }, { once:true });\n"
476
+ "})();\n"
477
+ "</script>"
478
+ )
479
+
480
+ m = re.search(r"</body>", doc, flags=re.IGNORECASE)
481
+ if m:
482
+ i = m.start()
483
+ doc = doc[:i] + nav_script + doc[i:]
484
+ else:
485
+ doc = doc + nav_script
486
+
487
+ except Exception:
488
+ pass # Non-fatal in preview
489
+
490
+ return doc
491
+
492
+ class MediaIntegrator:
493
+ """Handles integration of generated media into code"""
494
+
495
+ @staticmethod
496
+ def apply_generated_media_to_html(html_content: str, user_prompt: str,
497
+ enable_text_to_image: bool = False,
498
+ enable_image_to_image: bool = False,
499
+ input_image_data=None,
500
+ image_to_image_prompt: Optional[str] = None,
501
+ text_to_image_prompt: Optional[str] = None,
502
+ enable_image_to_video: bool = False,
503
+ image_to_video_prompt: Optional[str] = None,
504
+ session_id: Optional[str] = None,
505
+ enable_text_to_video: bool = False,
506
+ text_to_video_prompt: Optional[str] = None,
507
+ enable_text_to_music: bool = False,
508
+ text_to_music_prompt: Optional[str] = None,
509
+ token=None) -> str:
510
+ """Apply media generation to HTML content"""
511
+
512
+ # Detect multi-page structure
513
+ is_multipage = False
514
+ multipage_files = {}
515
+ entry_html_path = None
516
+
517
+ try:
518
+ multipage_files = MultipageProcessor.parse_multipage_html_output(html_content) or {}
519
+ if multipage_files:
520
+ is_multipage = True
521
+ entry_html_path = 'index.html' if 'index.html' in multipage_files else next((p for p in multipage_files.keys() if p.lower().endswith('.html')), None)
522
+ except Exception:
523
+ pass
524
+
525
+ result = multipage_files.get(entry_html_path, html_content) if is_multipage and entry_html_path else html_content
526
+
527
+ try:
528
+ # Process media generation based on priority
529
+ if enable_image_to_video and input_image_data is not None:
530
+ result = MediaIntegrator._apply_image_to_video(result, user_prompt, image_to_video_prompt, input_image_data, session_id, token)
531
+ elif enable_text_to_video:
532
+ result = MediaIntegrator._apply_text_to_video(result, user_prompt, text_to_video_prompt, session_id, token)
533
+ elif enable_text_to_music:
534
+ result = MediaIntegrator._apply_text_to_music(result, user_prompt, text_to_music_prompt, session_id, token)
535
+ elif enable_image_to_image and input_image_data is not None:
536
+ result = MediaIntegrator._apply_image_to_image(result, user_prompt, image_to_image_prompt, input_image_data, token)
537
+ elif enable_text_to_image:
538
+ result = MediaIntegrator._apply_text_to_image(result, user_prompt, text_to_image_prompt, token)
539
+ except Exception as e:
540
+ print(f"[MediaApply] Error during media generation: {str(e)}")
541
+
542
+ # Return updated content
543
+ if is_multipage and entry_html_path:
544
+ multipage_files[entry_html_path] = result
545
+ return MultipageProcessor.format_multipage_output(multipage_files)
546
+
547
+ return result
548
+
549
+ @staticmethod
550
+ def _apply_image_to_video(html_content: str, user_prompt: str, prompt: Optional[str],
551
+ input_image_data, session_id: Optional[str], token) -> str:
552
+ """Apply image-to-video generation"""
553
+ i2v_prompt = (prompt or user_prompt or "").strip()
554
+ print(f"[MediaApply] Applying image-to-video with prompt: {i2v_prompt}")
555
+
556
+ try:
557
+ video_html_tag = generate_video_from_image(input_image_data, i2v_prompt, session_id=session_id, token=token)
558
+ if not video_html_tag.startswith("Error") and validate_video_html(video_html_tag):
559
+ return MediaIntegrator._place_media_in_html(html_content, video_html_tag, "video")
560
+ except Exception as e:
561
+ print(f"[MediaApply] Image-to-video generation failed: {str(e)}")
562
+
563
+ return html_content
564
+
565
+ @staticmethod
566
+ def _apply_text_to_video(html_content: str, user_prompt: str, prompt: Optional[str],
567
+ session_id: Optional[str], token) -> str:
568
+ """Apply text-to-video generation"""
569
+ t2v_prompt = (prompt or user_prompt or "").strip()
570
+ print(f"[MediaApply] Applying text-to-video with prompt: {t2v_prompt}")
571
+
572
+ try:
573
+ video_html_tag = generate_video_from_text(t2v_prompt, session_id=session_id, token=token)
574
+ if not video_html_tag.startswith("Error") and validate_video_html(video_html_tag):
575
+ return MediaIntegrator._place_media_in_html(html_content, video_html_tag, "video")
576
+ except Exception as e:
577
+ print(f"[MediaApply] Text-to-video generation failed: {str(e)}")
578
+
579
+ return html_content
580
+
581
+ @staticmethod
582
+ def _apply_text_to_music(html_content: str, user_prompt: str, prompt: Optional[str],
583
+ session_id: Optional[str], token) -> str:
584
+ """Apply text-to-music generation"""
585
+ t2m_prompt = (prompt or user_prompt or "").strip()
586
+ print(f"[MediaApply] Applying text-to-music with prompt: {t2m_prompt}")
587
+
588
+ try:
589
+ audio_html_tag = generate_music_from_text(t2m_prompt, session_id=session_id, token=token)
590
+ if not audio_html_tag.startswith("Error"):
591
+ return MediaIntegrator._place_media_in_html(html_content, audio_html_tag, "audio")
592
+ except Exception as e:
593
+ print(f"[MediaApply] Text-to-music generation failed: {str(e)}")
594
+
595
+ return html_content
596
+
597
+ @staticmethod
598
+ def _apply_image_to_image(html_content: str, user_prompt: str, prompt: Optional[str],
599
+ input_image_data, token) -> str:
600
+ """Apply image-to-image generation"""
601
+ i2i_prompt = (prompt or user_prompt or "").strip()
602
+ print(f"[MediaApply] Applying image-to-image with prompt: {i2i_prompt}")
603
+
604
+ try:
605
+ image_html_tag = generate_image_to_image(input_image_data, i2i_prompt, token=token)
606
+ if not image_html_tag.startswith("Error"):
607
+ return MediaIntegrator._place_media_in_html(html_content, image_html_tag, "image")
608
+ except Exception as e:
609
+ print(f"[MediaApply] Image-to-image generation failed: {str(e)}")
610
+
611
+ return html_content
612
+
613
+ @staticmethod
614
+ def _apply_text_to_image(html_content: str, user_prompt: str, prompt: Optional[str], token) -> str:
615
+ """Apply text-to-image generation"""
616
+ t2i_prompt = (prompt or user_prompt or "").strip()
617
+ print(f"[MediaApply] Applying text-to-image with prompt: {t2i_prompt}")
618
+
619
+ try:
620
+ image_html_tag = generate_image_with_qwen(t2i_prompt, 0, token=token)
621
+ if not image_html_tag.startswith("Error"):
622
+ return MediaIntegrator._place_media_in_html(html_content, image_html_tag, "image")
623
+ except Exception as e:
624
+ print(f"[MediaApply] Text-to-image generation failed: {str(e)}")
625
+
626
+ return html_content
627
+
628
+ @staticmethod
629
+ def _place_media_in_html(html_content: str, media_html: str, media_type: str) -> str:
630
+ """Place generated media in appropriate location in HTML"""
631
+ # Find good insertion points
632
+ insertion_patterns = [
633
+ r'(<main[^>]*>)',
634
+ r'(<section[^>]*class="[^"]*hero[^"]*"[^>]*>)',
635
+ r'(<div[^>]*class="[^"]*container[^"]*"[^>]*>)',
636
+ r'(<body[^>]*>)'
637
+ ]
638
+
639
+ for pattern in insertion_patterns:
640
+ match = re.search(pattern, html_content, re.IGNORECASE)
641
+ if match:
642
+ insertion_point = match.end()
643
+ container_class = "video-container" if media_type == "video" else f"{media_type}-container"
644
+ media_with_container = f'\n <div class="{container_class}" style="margin: 20px 0; text-align: center;">\n {media_html}\n </div>'
645
+ return html_content[:insertion_point] + media_with_container + html_content[insertion_point:]
646
+
647
+ # Fallback: append before closing body
648
+ body_close = html_content.rfind('</body>')
649
+ if body_close != -1:
650
+ return html_content[:body_close] + f'\n {media_html}\n' + html_content[body_close:]
651
+
652
+ # Last resort: append at end
653
+ return html_content + f'\n{media_html}'
654
+
655
+ # Export main functions and classes
656
+ code_processor = CodeProcessor()
657
+ transformers_processor = TransformersJSProcessor()
658
+ svelte_processor = SvelteProcessor()
659
+ multipage_processor = MultipageProcessor()
660
+ media_integrator = MediaIntegrator()
661
+
662
+ # Main exports
663
+ def is_streamlit_code(code: str) -> bool:
664
+ return code_processor.is_streamlit_code(code)
665
+
666
+ def is_gradio_code(code: str) -> bool:
667
+ return code_processor.is_gradio_code(code)
668
+
669
+ def extract_html_document(text: str) -> str:
670
+ return code_processor.extract_html_document(text)
671
+
672
+ def parse_transformers_js_output(text: str) -> Dict[str, str]:
673
+ return transformers_processor.parse_transformers_js_output(text)
674
+
675
+ def format_transformers_js_output(files: Dict[str, str]) -> str:
676
+ return transformers_processor.format_transformers_js_output(files)
677
+
678
+ def build_transformers_inline_html(files: Dict[str, str]) -> str:
679
+ return transformers_processor.build_transformers_inline_html(files)
680
+
681
+ def parse_svelte_output(text: str) -> Dict[str, str]:
682
+ return svelte_processor.parse_svelte_output(text)
683
+
684
+ def format_svelte_output(files: Dict[str, str]) -> str:
685
+ return svelte_processor.format_svelte_output(files)
686
+
687
+ def parse_multipage_html_output(text: str) -> Dict[str, str]:
688
+ return multipage_processor.parse_multipage_html_output(text)
689
+
690
+ def format_multipage_output(files: Dict[str, str]) -> str:
691
+ return multipage_processor.format_multipage_output(files)
692
+
693
+ def validate_and_autofix_files(files: Dict[str, str]) -> Dict[str, str]:
694
+ return multipage_processor.validate_and_autofix_files(files)
695
+
696
+ def inline_multipage_into_single_preview(files: Dict[str, str]) -> str:
697
+ return multipage_processor.inline_multipage_into_single_preview(files)
698
+
699
+ def apply_generated_media_to_html(html_content: str, user_prompt: str, **kwargs) -> str:
700
+ return media_integrator.apply_generated_media_to_html(html_content, user_prompt, **kwargs)