Create code _processing.py
Browse files- code _processing.py +700 -0
code _processing.py
ADDED
@@ -0,0 +1,700 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Code processing utilities for parsing, transforming, and managing different code formats.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import re
|
6 |
+
import base64
|
7 |
+
import json
|
8 |
+
from typing import Dict, List, Optional, Tuple, Union
|
9 |
+
from pathlib import Path
|
10 |
+
|
11 |
+
from utils import apply_search_replace_changes, validate_video_html
|
12 |
+
from media_generation import generate_image_with_qwen, generate_image_to_image, generate_video_from_image, generate_video_from_text, generate_music_from_text
|
13 |
+
from config import SEARCH_START, DIVIDER, REPLACE_END
|
14 |
+
|
15 |
+
class CodeProcessor:
|
16 |
+
"""Handles processing and transformation of various code formats"""
|
17 |
+
|
18 |
+
@staticmethod
|
19 |
+
def is_streamlit_code(code: str) -> bool:
|
20 |
+
"""Check if Python code is a Streamlit app"""
|
21 |
+
if not code:
|
22 |
+
return False
|
23 |
+
lowered = code.lower()
|
24 |
+
return ("import streamlit" in lowered) or ("from streamlit" in lowered) or ("st." in code and "streamlit" in lowered)
|
25 |
+
|
26 |
+
@staticmethod
|
27 |
+
def is_gradio_code(code: str) -> bool:
|
28 |
+
"""Check if Python code is a Gradio app"""
|
29 |
+
if not code:
|
30 |
+
return False
|
31 |
+
lowered = code.lower()
|
32 |
+
return (
|
33 |
+
"import gradio" in lowered or
|
34 |
+
"from gradio" in lowered or
|
35 |
+
"gr.Interface(" in code or
|
36 |
+
"gr.Blocks(" in code
|
37 |
+
)
|
38 |
+
|
39 |
+
@staticmethod
|
40 |
+
def extract_html_document(text: str) -> str:
|
41 |
+
"""Extract HTML document from text, ignoring planning notes"""
|
42 |
+
if not text:
|
43 |
+
return text
|
44 |
+
lower = text.lower()
|
45 |
+
idx = lower.find("<!doctype html")
|
46 |
+
if idx == -1:
|
47 |
+
idx = lower.find("<html")
|
48 |
+
return text[idx:] if idx != -1 else text
|
49 |
+
|
50 |
+
class TransformersJSProcessor:
|
51 |
+
"""Handles Transformers.js specific code processing"""
|
52 |
+
|
53 |
+
@staticmethod
|
54 |
+
def parse_transformers_js_output(text: str) -> Dict[str, str]:
|
55 |
+
"""Parse transformers.js output and extract the three files"""
|
56 |
+
files = {
|
57 |
+
'index.html': '',
|
58 |
+
'index.js': '',
|
59 |
+
'style.css': ''
|
60 |
+
}
|
61 |
+
|
62 |
+
if not text:
|
63 |
+
return files
|
64 |
+
|
65 |
+
# Multiple patterns for different code block variations
|
66 |
+
html_patterns = [
|
67 |
+
r'```html\s*\n([\s\S]*?)(?:```|\Z)',
|
68 |
+
r'```htm\s*\n([\s\S]*?)(?:```|\Z)',
|
69 |
+
r'```\s*(?:index\.html|html)\s*\n([\s\S]*?)(?:```|\Z)'
|
70 |
+
]
|
71 |
+
|
72 |
+
js_patterns = [
|
73 |
+
r'```javascript\s*\n([\s\S]*?)(?:```|\Z)',
|
74 |
+
r'```js\s*\n([\s\S]*?)(?:```|\Z)',
|
75 |
+
r'```\s*(?:index\.js|javascript|js)\s*\n([\s\S]*?)(?:```|\Z)'
|
76 |
+
]
|
77 |
+
|
78 |
+
css_patterns = [
|
79 |
+
r'```css\s*\n([\s\S]*?)(?:```|\Z)',
|
80 |
+
r'```\s*(?:style\.css|css)\s*\n([\s\S]*?)(?:```|\Z)'
|
81 |
+
]
|
82 |
+
|
83 |
+
# Extract content using patterns
|
84 |
+
for pattern in html_patterns:
|
85 |
+
html_match = re.search(pattern, text, re.IGNORECASE)
|
86 |
+
if html_match:
|
87 |
+
files['index.html'] = html_match.group(1).strip()
|
88 |
+
break
|
89 |
+
|
90 |
+
for pattern in js_patterns:
|
91 |
+
js_match = re.search(pattern, text, re.IGNORECASE)
|
92 |
+
if js_match:
|
93 |
+
files['index.js'] = js_match.group(1).strip()
|
94 |
+
break
|
95 |
+
|
96 |
+
for pattern in css_patterns:
|
97 |
+
css_match = re.search(pattern, text, re.IGNORECASE)
|
98 |
+
if css_match:
|
99 |
+
files['style.css'] = css_match.group(1).strip()
|
100 |
+
break
|
101 |
+
|
102 |
+
# Fallback: support === filename === format
|
103 |
+
if not (files['index.html'] and files['index.js'] and files['style.css']):
|
104 |
+
fallback_files = MultipageProcessor.parse_multipage_html_output(text)
|
105 |
+
for key in files.keys():
|
106 |
+
if key in fallback_files:
|
107 |
+
files[key] = fallback_files[key]
|
108 |
+
|
109 |
+
return files
|
110 |
+
|
111 |
+
@staticmethod
|
112 |
+
def format_transformers_js_output(files: Dict[str, str]) -> str:
|
113 |
+
"""Format the three files into a single display string"""
|
114 |
+
output = []
|
115 |
+
output.append("=== index.html ===")
|
116 |
+
output.append(files.get('index.html', ''))
|
117 |
+
output.append("\n=== index.js ===")
|
118 |
+
output.append(files.get('index.js', ''))
|
119 |
+
output.append("\n=== style.css ===")
|
120 |
+
output.append(files.get('style.css', ''))
|
121 |
+
return '\n'.join(output)
|
122 |
+
|
123 |
+
@staticmethod
|
124 |
+
def build_transformers_inline_html(files: Dict[str, str]) -> str:
|
125 |
+
"""Merge transformers.js files into a single HTML document"""
|
126 |
+
html = files.get('index.html') or ''
|
127 |
+
js = files.get('index.js') or ''
|
128 |
+
css = files.get('style.css') or ''
|
129 |
+
|
130 |
+
# Normalize JS imports to stable CDN
|
131 |
+
cdn_url = "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]"
|
132 |
+
|
133 |
+
def _normalize_imports(_code: str) -> str:
|
134 |
+
if not _code:
|
135 |
+
return _code or ""
|
136 |
+
_code = re.sub(r"from\s+['\"]@huggingface/transformers['\"]", f"from '{cdn_url}'", _code)
|
137 |
+
_code = re.sub(r"from\s+['\"]@xenova/transformers['\"]", f"from '{cdn_url}'", _code)
|
138 |
+
_code = re.sub(r"from\s+['\"]https://cdn.jsdelivr.net/npm/@huggingface/transformers@[^'\"]+['\"]", f"from '{cdn_url}'", _code)
|
139 |
+
_code = re.sub(r"from\s+['\"]https://cdn.jsdelivr.net/npm/@xenova/transformers@[^'\"]+['\"]", f"from '{cdn_url}'", _code)
|
140 |
+
return _code
|
141 |
+
|
142 |
+
# Extract and merge inline module scripts
|
143 |
+
inline_modules = []
|
144 |
+
try:
|
145 |
+
for _m in re.finditer(r"<script\b[^>]*type=[\"']module[\"'][^>]*>([\s\S]*?)</script>", html, flags=re.IGNORECASE):
|
146 |
+
inline_modules.append(_m.group(1))
|
147 |
+
if inline_modules:
|
148 |
+
html = re.sub(r"<script\b[^>]*type=[\"']module[\"'][^>]*>[\s\S]*?</script>\s*", "", html, flags=re.IGNORECASE)
|
149 |
+
except Exception:
|
150 |
+
pass
|
151 |
+
|
152 |
+
# Combine JS code
|
153 |
+
combined_js_parts = []
|
154 |
+
if inline_modules:
|
155 |
+
combined_js_parts.append("\n\n".join(inline_modules))
|
156 |
+
if js:
|
157 |
+
combined_js_parts.append(js)
|
158 |
+
js = "\n\n".join([p for p in combined_js_parts if (p and p.strip())])
|
159 |
+
js = _normalize_imports(js)
|
160 |
+
|
161 |
+
# Add prelude for better compatibility
|
162 |
+
if js.strip():
|
163 |
+
prelude = (
|
164 |
+
f"import {{ env }} from '{cdn_url}';\n"
|
165 |
+
"try { env.useBrowserCache = false; } catch (e) {}\n"
|
166 |
+
"try { if (env && env.backends && env.backends.onnx && env.backends.onnx.wasm) { env.backends.onnx.wasm.numThreads = 1; env.backends.onnx.wasm.proxy = false; } } catch (e) {}\n"
|
167 |
+
f"(async () => {{ try {{ if (typeof globalThis.transformers === 'undefined') {{ const m = await import('{cdn_url}'); globalThis.transformers = m; }} }} catch (e) {{}} }})();\n"
|
168 |
+
)
|
169 |
+
js = prelude + js
|
170 |
+
|
171 |
+
# Create minimal shell if needed
|
172 |
+
doc = html.strip()
|
173 |
+
if not doc or ('<html' not in doc.lower()):
|
174 |
+
doc = (
|
175 |
+
"<!DOCTYPE html>\n"
|
176 |
+
"<html>\n<head>\n<meta charset=\"UTF-8\">\n<meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n<title>Transformers.js App</title>\n</head>\n"
|
177 |
+
"<body>\n<div id=\"app\"></div>\n</body>\n</html>"
|
178 |
+
)
|
179 |
+
|
180 |
+
# Remove local file references
|
181 |
+
doc = re.sub(r"<link[^>]+href=\"[^\"]*style\.css\"[^>]*>\s*", "", doc, flags=re.IGNORECASE)
|
182 |
+
doc = re.sub(r"<script[^>]+src=\"[^\"]*index\.js\"[^>]*>\s*</script>\s*", "", doc, flags=re.IGNORECASE)
|
183 |
+
|
184 |
+
# Inline CSS
|
185 |
+
if css:
|
186 |
+
style_tag = f"<style>\n{css}\n</style>"
|
187 |
+
if '</head>' in doc.lower():
|
188 |
+
match = re.search(r"</head>", doc, flags=re.IGNORECASE)
|
189 |
+
if match:
|
190 |
+
idx = match.start()
|
191 |
+
doc = doc[:idx] + style_tag + doc[idx:]
|
192 |
+
else:
|
193 |
+
match = re.search(r"<body[^>]*>", doc, flags=re.IGNORECASE)
|
194 |
+
if match:
|
195 |
+
idx = match.end()
|
196 |
+
doc = doc[:idx] + "\n" + style_tag + doc[idx:]
|
197 |
+
else:
|
198 |
+
doc = style_tag + doc
|
199 |
+
|
200 |
+
# Inline JS with debugging and cleanup
|
201 |
+
if js:
|
202 |
+
script_tag = f"<script type=\"module\">\n{js}\n</script>"
|
203 |
+
debug_overlay = TransformersJSProcessor._create_debug_overlay()
|
204 |
+
cleanup_tag = TransformersJSProcessor._create_cleanup_script()
|
205 |
+
|
206 |
+
match = re.search(r"</body>", doc, flags=re.IGNORECASE)
|
207 |
+
if match:
|
208 |
+
idx = match.start()
|
209 |
+
doc = doc[:idx] + debug_overlay + script_tag + cleanup_tag + doc[idx:]
|
210 |
+
else:
|
211 |
+
doc = doc + debug_overlay + script_tag + cleanup_tag
|
212 |
+
|
213 |
+
return doc
|
214 |
+
|
215 |
+
@staticmethod
|
216 |
+
def _create_debug_overlay() -> str:
|
217 |
+
"""Create debug overlay for transformers.js apps"""
|
218 |
+
return (
|
219 |
+
"<style>\n"
|
220 |
+
"#anycoder-debug{position:fixed;left:0;right:0;bottom:0;max-height:45%;overflow:auto;"
|
221 |
+
"background:rgba(0,0,0,.85);color:#9eff9e;padding:.5em;font:12px/1.4 monospace;z-index:2147483647;display:none}"
|
222 |
+
"#anycoder-debug pre{margin:0;white-space:pre-wrap;word-break:break-word}"
|
223 |
+
"</style>\n"
|
224 |
+
"<div id=\"anycoder-debug\"></div>\n"
|
225 |
+
"<script>\n"
|
226 |
+
"(function(){\n"
|
227 |
+
" const el = document.getElementById('anycoder-debug');\n"
|
228 |
+
" function show(){ if(el && el.style.display!=='block'){ el.style.display='block'; } }\n"
|
229 |
+
" function log(msg){ try{ show(); const pre=document.createElement('pre'); pre.textContent=msg; el.appendChild(pre);}catch(e){} }\n"
|
230 |
+
" const origError = console.error.bind(console);\n"
|
231 |
+
" console.error = function(){ origError.apply(console, arguments); try{ log('console.error: ' + Array.from(arguments).map(a=>{try{return (typeof a==='string')?a:JSON.stringify(a);}catch(e){return String(a);}}).join(' ')); }catch(e){} };\n"
|
232 |
+
" window.addEventListener('error', e => { log('window.onerror: ' + (e && e.message ? e.message : 'Unknown error')); });\n"
|
233 |
+
" window.addEventListener('unhandledrejection', e => { try{ const r=e && e.reason; log('unhandledrejection: ' + (r && (r.message || JSON.stringify(r)))); }catch(err){ log('unhandledrejection'); } });\n"
|
234 |
+
"})();\n"
|
235 |
+
"</script>"
|
236 |
+
)
|
237 |
+
|
238 |
+
@staticmethod
|
239 |
+
def _create_cleanup_script() -> str:
|
240 |
+
"""Create cleanup script for transformers.js apps"""
|
241 |
+
return (
|
242 |
+
"<script>\n"
|
243 |
+
"(function(){\n"
|
244 |
+
" function cleanup(){\n"
|
245 |
+
" try { if (window.caches && caches.keys) { caches.keys().then(keys => keys.forEach(k => caches.delete(k))); } } catch(e){}\n"
|
246 |
+
" try { if (window.indexedDB && indexedDB.databases) { indexedDB.databases().then(dbs => dbs.forEach(db => db && db.name && indexedDB.deleteDatabase(db.name))); } } catch(e){}\n"
|
247 |
+
" }\n"
|
248 |
+
" window.addEventListener('pagehide', cleanup, { once: true });\n"
|
249 |
+
" window.addEventListener('beforeunload', cleanup, { once: true });\n"
|
250 |
+
"})();\n"
|
251 |
+
"</script>"
|
252 |
+
)
|
253 |
+
|
254 |
+
class SvelteProcessor:
|
255 |
+
"""Handles Svelte specific code processing"""
|
256 |
+
|
257 |
+
@staticmethod
|
258 |
+
def parse_svelte_output(text: str) -> Dict[str, str]:
|
259 |
+
"""Parse Svelte output to extract individual files"""
|
260 |
+
files = {
|
261 |
+
'src/App.svelte': '',
|
262 |
+
'src/app.css': ''
|
263 |
+
}
|
264 |
+
|
265 |
+
if not text:
|
266 |
+
return files
|
267 |
+
|
268 |
+
# Extract using code block patterns
|
269 |
+
svelte_pattern = r'```svelte\s*\n([\s\S]+?)\n```'
|
270 |
+
css_pattern = r'```css\s*\n([\s\S]+?)\n```'
|
271 |
+
|
272 |
+
svelte_match = re.search(svelte_pattern, text, re.IGNORECASE)
|
273 |
+
css_match = re.search(css_pattern, text, re.IGNORECASE)
|
274 |
+
|
275 |
+
if svelte_match:
|
276 |
+
files['src/App.svelte'] = svelte_match.group(1).strip()
|
277 |
+
if css_match:
|
278 |
+
files['src/app.css'] = css_match.group(1).strip()
|
279 |
+
|
280 |
+
# Fallback: support === filename === format
|
281 |
+
if not (files['src/App.svelte'] and files['src/app.css']):
|
282 |
+
fallback_files = MultipageProcessor.parse_multipage_html_output(text)
|
283 |
+
for key in files.keys():
|
284 |
+
if key in fallback_files:
|
285 |
+
files[key] = fallback_files[key]
|
286 |
+
|
287 |
+
return files
|
288 |
+
|
289 |
+
@staticmethod
|
290 |
+
def format_svelte_output(files: Dict[str, str]) -> str:
|
291 |
+
"""Format Svelte files into a single display string"""
|
292 |
+
output = []
|
293 |
+
output.append("=== src/App.svelte ===")
|
294 |
+
output.append(files.get('src/App.svelte', ''))
|
295 |
+
output.append("\n=== src/app.css ===")
|
296 |
+
output.append(files.get('src/app.css', ''))
|
297 |
+
return '\n'.join(output)
|
298 |
+
|
299 |
+
class MultipageProcessor:
|
300 |
+
"""Handles multi-page HTML projects"""
|
301 |
+
|
302 |
+
@staticmethod
|
303 |
+
def parse_multipage_html_output(text: str) -> Dict[str, str]:
|
304 |
+
"""Parse multi-page HTML output formatted as === filename === sections"""
|
305 |
+
if not text:
|
306 |
+
return {}
|
307 |
+
|
308 |
+
from utils import remove_code_block
|
309 |
+
cleaned = remove_code_block(text)
|
310 |
+
files: Dict[str, str] = {}
|
311 |
+
|
312 |
+
pattern = re.compile(r"^===\s*([^=\n]+?)\s*===\s*\n([\s\S]*?)(?=\n===\s*[^=\n]+?\s*===|\Z)", re.MULTILINE)
|
313 |
+
|
314 |
+
for m in pattern.finditer(cleaned):
|
315 |
+
name = m.group(1).strip()
|
316 |
+
content = m.group(2).strip()
|
317 |
+
# Remove accidental trailing fences
|
318 |
+
content = re.sub(r"^```\w*\s*\n|\n```\s*$", "", content)
|
319 |
+
files[name] = content
|
320 |
+
|
321 |
+
return files
|
322 |
+
|
323 |
+
@staticmethod
|
324 |
+
def format_multipage_output(files: Dict[str, str]) -> str:
|
325 |
+
"""Format files back into === filename === sections"""
|
326 |
+
if not isinstance(files, dict) or not files:
|
327 |
+
return ""
|
328 |
+
|
329 |
+
# Order with index.html first
|
330 |
+
ordered_paths = []
|
331 |
+
if 'index.html' in files:
|
332 |
+
ordered_paths.append('index.html')
|
333 |
+
for path in sorted(files.keys()):
|
334 |
+
if path == 'index.html':
|
335 |
+
continue
|
336 |
+
ordered_paths.append(path)
|
337 |
+
|
338 |
+
parts: List[str] = []
|
339 |
+
for path in ordered_paths:
|
340 |
+
parts.append(f"=== {path} ===")
|
341 |
+
parts.append((files.get(path) or '').rstrip())
|
342 |
+
|
343 |
+
return "\n".join(parts)
|
344 |
+
|
345 |
+
@staticmethod
|
346 |
+
def validate_and_autofix_files(files: Dict[str, str]) -> Dict[str, str]:
|
347 |
+
"""Ensure minimal contract for multi-file sites"""
|
348 |
+
if not isinstance(files, dict) or not files:
|
349 |
+
return files or {}
|
350 |
+
|
351 |
+
normalized: Dict[str, str] = {}
|
352 |
+
for k, v in files.items():
|
353 |
+
safe_key = k.strip().lstrip('/')
|
354 |
+
normalized[safe_key] = v
|
355 |
+
|
356 |
+
html_files = [p for p in normalized.keys() if p.lower().endswith('.html')]
|
357 |
+
has_index = 'index.html' in normalized
|
358 |
+
|
359 |
+
# Create index.html if missing but other HTML files exist
|
360 |
+
if not has_index and html_files:
|
361 |
+
links = '\n'.join([f"<li><a href=\"{p}\">{p}</a></li>" for p in html_files])
|
362 |
+
normalized['index.html'] = (
|
363 |
+
"<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n<meta charset=\"utf-8\"/>\n"
|
364 |
+
"<meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"/>\n"
|
365 |
+
"<title>Site Index</title>\n</head>\n<body>\n<h1>Site</h1>\n<ul>\n"
|
366 |
+
+ links + "\n</ul>\n</body>\n</html>"
|
367 |
+
)
|
368 |
+
|
369 |
+
# Collect asset references
|
370 |
+
asset_refs: set[str] = set()
|
371 |
+
patterns = [
|
372 |
+
re.compile(r"<link[^>]+href=\"([^\"]+)\""),
|
373 |
+
re.compile(r"<script[^>]+src=\"([^\"]+)\""),
|
374 |
+
re.compile(r"<img[^>]+src=\"([^\"]+)\""),
|
375 |
+
re.compile(r"<a[^>]+href=\"([^\"]+)\"")
|
376 |
+
]
|
377 |
+
|
378 |
+
for path, content in list(normalized.items()):
|
379 |
+
if not path.lower().endswith('.html'):
|
380 |
+
continue
|
381 |
+
for patt in patterns:
|
382 |
+
for m in patt.finditer(content or ""):
|
383 |
+
ref = (m.group(1) or "").strip()
|
384 |
+
if not ref or ref.startswith(('http://', 'https://', 'data:', '#')):
|
385 |
+
continue
|
386 |
+
asset_refs.add(ref.lstrip('/'))
|
387 |
+
|
388 |
+
# Add minimal stubs for missing references
|
389 |
+
for ref in list(asset_refs):
|
390 |
+
if ref not in normalized:
|
391 |
+
if ref.lower().endswith('.css'):
|
392 |
+
normalized[ref] = "/* generated stub */\n"
|
393 |
+
elif ref.lower().endswith('.js'):
|
394 |
+
normalized[ref] = "// generated stub\n"
|
395 |
+
elif ref.lower().endswith('.html'):
|
396 |
+
normalized[ref] = (
|
397 |
+
"<!DOCTYPE html>\n<html lang=\"en\">\n<head><meta charset=\"utf-8\"/><meta name=\"viewport\" content=\"width=device-width, initial-scale=1\"/><title>Page</title></head>\n"
|
398 |
+
"<body><main><h1>Placeholder page</h1><p>This page was auto-created to satisfy an internal link.</p></main></body>\n</html>"
|
399 |
+
)
|
400 |
+
|
401 |
+
return normalized
|
402 |
+
|
403 |
+
@staticmethod
|
404 |
+
def inline_multipage_into_single_preview(files: Dict[str, str]) -> str:
|
405 |
+
"""Inline local CSS/JS for iframe preview"""
|
406 |
+
html = files.get('index.html', '')
|
407 |
+
if not html:
|
408 |
+
return ""
|
409 |
+
|
410 |
+
doc = html
|
411 |
+
|
412 |
+
# Inline CSS links
|
413 |
+
def _inline_css(match):
|
414 |
+
href = match.group(1)
|
415 |
+
if href in files:
|
416 |
+
return f"<style>\n{files[href]}\n</style>"
|
417 |
+
return match.group(0)
|
418 |
+
|
419 |
+
doc = re.sub(r"<link[^>]+href=\"([^\"]+)\"[^>]*/?>", _inline_css, doc, flags=re.IGNORECASE)
|
420 |
+
|
421 |
+
# Inline JS scripts
|
422 |
+
def _inline_js(match):
|
423 |
+
src = match.group(1)
|
424 |
+
if src in files:
|
425 |
+
return f"<script>\n{files[src]}\n</script>"
|
426 |
+
return match.group(0)
|
427 |
+
|
428 |
+
doc = re.sub(r"<script[^>]+src=\"([^\"]+)\"[^>]*>\s*</script>", _inline_js, doc, flags=re.IGNORECASE)
|
429 |
+
|
430 |
+
# Add client-side navigation for other pages
|
431 |
+
MultipageProcessor._add_client_side_navigation(doc, files)
|
432 |
+
|
433 |
+
return doc
|
434 |
+
|
435 |
+
@staticmethod
|
436 |
+
def _add_client_side_navigation(doc: str, files: Dict[str, str]) -> str:
|
437 |
+
"""Add client-side navigation for multi-page preview"""
|
438 |
+
try:
|
439 |
+
html_pages = {k: v for k, v in files.items() if k.lower().endswith('.html')}
|
440 |
+
|
441 |
+
# Extract body content for each page
|
442 |
+
_index_body = re.search(r"<body[^>]*>([\s\S]*?)</body>", doc, flags=re.IGNORECASE)
|
443 |
+
html_pages['index.html'] = _index_body.group(1) if _index_body else doc
|
444 |
+
|
445 |
+
encoded = base64.b64encode(json.dumps(html_pages).encode('utf-8')).decode('ascii')
|
446 |
+
|
447 |
+
nav_script = (
|
448 |
+
"<script>\n"
|
449 |
+
"(function(){\n"
|
450 |
+
f" const MP_FILES = JSON.parse(atob('{encoded}'));\n"
|
451 |
+
" function extractBody(html){\n"
|
452 |
+
" try {\n"
|
453 |
+
" const doc = new DOMParser().parseFromString(html, 'text/html');\n"
|
454 |
+
" const title = doc.querySelector('title'); if (title) document.title = title.textContent || document.title;\n"
|
455 |
+
" return doc.body ? doc.body.innerHTML : html;\n"
|
456 |
+
" } catch(e){ return html; }\n"
|
457 |
+
" }\n"
|
458 |
+
" function loadPage(path){\n"
|
459 |
+
" if (!MP_FILES[path]) return false;\n"
|
460 |
+
" const bodyHTML = extractBody(MP_FILES[path]);\n"
|
461 |
+
" document.body.innerHTML = bodyHTML;\n"
|
462 |
+
" attach();\n"
|
463 |
+
" try { history.replaceState({}, '', '#'+path); } catch(e){}\n"
|
464 |
+
" return true;\n"
|
465 |
+
" }\n"
|
466 |
+
" function clickHandler(e){\n"
|
467 |
+
" const a = e.target && e.target.closest ? e.target.closest('a') : null;\n"
|
468 |
+
" if (!a) return;\n"
|
469 |
+
" const href = a.getAttribute('href') || '';\n"
|
470 |
+
" if (!href || href.startsWith('#') || /^https?:/i.test(href) || href.startsWith('mailto:') || href.startsWith('tel:')) return;\n"
|
471 |
+
" const clean = href.split('#')[0].split('?')[0];\n"
|
472 |
+
" if (MP_FILES[clean]) { e.preventDefault(); loadPage(clean); }\n"
|
473 |
+
" }\n"
|
474 |
+
" function attach(){ document.removeEventListener('click', clickHandler, true); document.addEventListener('click', clickHandler, true); }\n"
|
475 |
+
" document.addEventListener('DOMContentLoaded', function(){ attach(); const initial = (location.hash||'').slice(1); if (initial && MP_FILES[initial]) loadPage(initial); }, { once:true });\n"
|
476 |
+
"})();\n"
|
477 |
+
"</script>"
|
478 |
+
)
|
479 |
+
|
480 |
+
m = re.search(r"</body>", doc, flags=re.IGNORECASE)
|
481 |
+
if m:
|
482 |
+
i = m.start()
|
483 |
+
doc = doc[:i] + nav_script + doc[i:]
|
484 |
+
else:
|
485 |
+
doc = doc + nav_script
|
486 |
+
|
487 |
+
except Exception:
|
488 |
+
pass # Non-fatal in preview
|
489 |
+
|
490 |
+
return doc
|
491 |
+
|
492 |
+
class MediaIntegrator:
|
493 |
+
"""Handles integration of generated media into code"""
|
494 |
+
|
495 |
+
@staticmethod
|
496 |
+
def apply_generated_media_to_html(html_content: str, user_prompt: str,
|
497 |
+
enable_text_to_image: bool = False,
|
498 |
+
enable_image_to_image: bool = False,
|
499 |
+
input_image_data=None,
|
500 |
+
image_to_image_prompt: Optional[str] = None,
|
501 |
+
text_to_image_prompt: Optional[str] = None,
|
502 |
+
enable_image_to_video: bool = False,
|
503 |
+
image_to_video_prompt: Optional[str] = None,
|
504 |
+
session_id: Optional[str] = None,
|
505 |
+
enable_text_to_video: bool = False,
|
506 |
+
text_to_video_prompt: Optional[str] = None,
|
507 |
+
enable_text_to_music: bool = False,
|
508 |
+
text_to_music_prompt: Optional[str] = None,
|
509 |
+
token=None) -> str:
|
510 |
+
"""Apply media generation to HTML content"""
|
511 |
+
|
512 |
+
# Detect multi-page structure
|
513 |
+
is_multipage = False
|
514 |
+
multipage_files = {}
|
515 |
+
entry_html_path = None
|
516 |
+
|
517 |
+
try:
|
518 |
+
multipage_files = MultipageProcessor.parse_multipage_html_output(html_content) or {}
|
519 |
+
if multipage_files:
|
520 |
+
is_multipage = True
|
521 |
+
entry_html_path = 'index.html' if 'index.html' in multipage_files else next((p for p in multipage_files.keys() if p.lower().endswith('.html')), None)
|
522 |
+
except Exception:
|
523 |
+
pass
|
524 |
+
|
525 |
+
result = multipage_files.get(entry_html_path, html_content) if is_multipage and entry_html_path else html_content
|
526 |
+
|
527 |
+
try:
|
528 |
+
# Process media generation based on priority
|
529 |
+
if enable_image_to_video and input_image_data is not None:
|
530 |
+
result = MediaIntegrator._apply_image_to_video(result, user_prompt, image_to_video_prompt, input_image_data, session_id, token)
|
531 |
+
elif enable_text_to_video:
|
532 |
+
result = MediaIntegrator._apply_text_to_video(result, user_prompt, text_to_video_prompt, session_id, token)
|
533 |
+
elif enable_text_to_music:
|
534 |
+
result = MediaIntegrator._apply_text_to_music(result, user_prompt, text_to_music_prompt, session_id, token)
|
535 |
+
elif enable_image_to_image and input_image_data is not None:
|
536 |
+
result = MediaIntegrator._apply_image_to_image(result, user_prompt, image_to_image_prompt, input_image_data, token)
|
537 |
+
elif enable_text_to_image:
|
538 |
+
result = MediaIntegrator._apply_text_to_image(result, user_prompt, text_to_image_prompt, token)
|
539 |
+
except Exception as e:
|
540 |
+
print(f"[MediaApply] Error during media generation: {str(e)}")
|
541 |
+
|
542 |
+
# Return updated content
|
543 |
+
if is_multipage and entry_html_path:
|
544 |
+
multipage_files[entry_html_path] = result
|
545 |
+
return MultipageProcessor.format_multipage_output(multipage_files)
|
546 |
+
|
547 |
+
return result
|
548 |
+
|
549 |
+
@staticmethod
|
550 |
+
def _apply_image_to_video(html_content: str, user_prompt: str, prompt: Optional[str],
|
551 |
+
input_image_data, session_id: Optional[str], token) -> str:
|
552 |
+
"""Apply image-to-video generation"""
|
553 |
+
i2v_prompt = (prompt or user_prompt or "").strip()
|
554 |
+
print(f"[MediaApply] Applying image-to-video with prompt: {i2v_prompt}")
|
555 |
+
|
556 |
+
try:
|
557 |
+
video_html_tag = generate_video_from_image(input_image_data, i2v_prompt, session_id=session_id, token=token)
|
558 |
+
if not video_html_tag.startswith("Error") and validate_video_html(video_html_tag):
|
559 |
+
return MediaIntegrator._place_media_in_html(html_content, video_html_tag, "video")
|
560 |
+
except Exception as e:
|
561 |
+
print(f"[MediaApply] Image-to-video generation failed: {str(e)}")
|
562 |
+
|
563 |
+
return html_content
|
564 |
+
|
565 |
+
@staticmethod
|
566 |
+
def _apply_text_to_video(html_content: str, user_prompt: str, prompt: Optional[str],
|
567 |
+
session_id: Optional[str], token) -> str:
|
568 |
+
"""Apply text-to-video generation"""
|
569 |
+
t2v_prompt = (prompt or user_prompt or "").strip()
|
570 |
+
print(f"[MediaApply] Applying text-to-video with prompt: {t2v_prompt}")
|
571 |
+
|
572 |
+
try:
|
573 |
+
video_html_tag = generate_video_from_text(t2v_prompt, session_id=session_id, token=token)
|
574 |
+
if not video_html_tag.startswith("Error") and validate_video_html(video_html_tag):
|
575 |
+
return MediaIntegrator._place_media_in_html(html_content, video_html_tag, "video")
|
576 |
+
except Exception as e:
|
577 |
+
print(f"[MediaApply] Text-to-video generation failed: {str(e)}")
|
578 |
+
|
579 |
+
return html_content
|
580 |
+
|
581 |
+
@staticmethod
|
582 |
+
def _apply_text_to_music(html_content: str, user_prompt: str, prompt: Optional[str],
|
583 |
+
session_id: Optional[str], token) -> str:
|
584 |
+
"""Apply text-to-music generation"""
|
585 |
+
t2m_prompt = (prompt or user_prompt or "").strip()
|
586 |
+
print(f"[MediaApply] Applying text-to-music with prompt: {t2m_prompt}")
|
587 |
+
|
588 |
+
try:
|
589 |
+
audio_html_tag = generate_music_from_text(t2m_prompt, session_id=session_id, token=token)
|
590 |
+
if not audio_html_tag.startswith("Error"):
|
591 |
+
return MediaIntegrator._place_media_in_html(html_content, audio_html_tag, "audio")
|
592 |
+
except Exception as e:
|
593 |
+
print(f"[MediaApply] Text-to-music generation failed: {str(e)}")
|
594 |
+
|
595 |
+
return html_content
|
596 |
+
|
597 |
+
@staticmethod
|
598 |
+
def _apply_image_to_image(html_content: str, user_prompt: str, prompt: Optional[str],
|
599 |
+
input_image_data, token) -> str:
|
600 |
+
"""Apply image-to-image generation"""
|
601 |
+
i2i_prompt = (prompt or user_prompt or "").strip()
|
602 |
+
print(f"[MediaApply] Applying image-to-image with prompt: {i2i_prompt}")
|
603 |
+
|
604 |
+
try:
|
605 |
+
image_html_tag = generate_image_to_image(input_image_data, i2i_prompt, token=token)
|
606 |
+
if not image_html_tag.startswith("Error"):
|
607 |
+
return MediaIntegrator._place_media_in_html(html_content, image_html_tag, "image")
|
608 |
+
except Exception as e:
|
609 |
+
print(f"[MediaApply] Image-to-image generation failed: {str(e)}")
|
610 |
+
|
611 |
+
return html_content
|
612 |
+
|
613 |
+
@staticmethod
|
614 |
+
def _apply_text_to_image(html_content: str, user_prompt: str, prompt: Optional[str], token) -> str:
|
615 |
+
"""Apply text-to-image generation"""
|
616 |
+
t2i_prompt = (prompt or user_prompt or "").strip()
|
617 |
+
print(f"[MediaApply] Applying text-to-image with prompt: {t2i_prompt}")
|
618 |
+
|
619 |
+
try:
|
620 |
+
image_html_tag = generate_image_with_qwen(t2i_prompt, 0, token=token)
|
621 |
+
if not image_html_tag.startswith("Error"):
|
622 |
+
return MediaIntegrator._place_media_in_html(html_content, image_html_tag, "image")
|
623 |
+
except Exception as e:
|
624 |
+
print(f"[MediaApply] Text-to-image generation failed: {str(e)}")
|
625 |
+
|
626 |
+
return html_content
|
627 |
+
|
628 |
+
@staticmethod
|
629 |
+
def _place_media_in_html(html_content: str, media_html: str, media_type: str) -> str:
|
630 |
+
"""Place generated media in appropriate location in HTML"""
|
631 |
+
# Find good insertion points
|
632 |
+
insertion_patterns = [
|
633 |
+
r'(<main[^>]*>)',
|
634 |
+
r'(<section[^>]*class="[^"]*hero[^"]*"[^>]*>)',
|
635 |
+
r'(<div[^>]*class="[^"]*container[^"]*"[^>]*>)',
|
636 |
+
r'(<body[^>]*>)'
|
637 |
+
]
|
638 |
+
|
639 |
+
for pattern in insertion_patterns:
|
640 |
+
match = re.search(pattern, html_content, re.IGNORECASE)
|
641 |
+
if match:
|
642 |
+
insertion_point = match.end()
|
643 |
+
container_class = "video-container" if media_type == "video" else f"{media_type}-container"
|
644 |
+
media_with_container = f'\n <div class="{container_class}" style="margin: 20px 0; text-align: center;">\n {media_html}\n </div>'
|
645 |
+
return html_content[:insertion_point] + media_with_container + html_content[insertion_point:]
|
646 |
+
|
647 |
+
# Fallback: append before closing body
|
648 |
+
body_close = html_content.rfind('</body>')
|
649 |
+
if body_close != -1:
|
650 |
+
return html_content[:body_close] + f'\n {media_html}\n' + html_content[body_close:]
|
651 |
+
|
652 |
+
# Last resort: append at end
|
653 |
+
return html_content + f'\n{media_html}'
|
654 |
+
|
655 |
+
# Export main functions and classes
|
656 |
+
code_processor = CodeProcessor()
|
657 |
+
transformers_processor = TransformersJSProcessor()
|
658 |
+
svelte_processor = SvelteProcessor()
|
659 |
+
multipage_processor = MultipageProcessor()
|
660 |
+
media_integrator = MediaIntegrator()
|
661 |
+
|
662 |
+
# Main exports
|
663 |
+
def is_streamlit_code(code: str) -> bool:
|
664 |
+
return code_processor.is_streamlit_code(code)
|
665 |
+
|
666 |
+
def is_gradio_code(code: str) -> bool:
|
667 |
+
return code_processor.is_gradio_code(code)
|
668 |
+
|
669 |
+
def extract_html_document(text: str) -> str:
|
670 |
+
return code_processor.extract_html_document(text)
|
671 |
+
|
672 |
+
def parse_transformers_js_output(text: str) -> Dict[str, str]:
|
673 |
+
return transformers_processor.parse_transformers_js_output(text)
|
674 |
+
|
675 |
+
def format_transformers_js_output(files: Dict[str, str]) -> str:
|
676 |
+
return transformers_processor.format_transformers_js_output(files)
|
677 |
+
|
678 |
+
def build_transformers_inline_html(files: Dict[str, str]) -> str:
|
679 |
+
return transformers_processor.build_transformers_inline_html(files)
|
680 |
+
|
681 |
+
def parse_svelte_output(text: str) -> Dict[str, str]:
|
682 |
+
return svelte_processor.parse_svelte_output(text)
|
683 |
+
|
684 |
+
def format_svelte_output(files: Dict[str, str]) -> str:
|
685 |
+
return svelte_processor.format_svelte_output(files)
|
686 |
+
|
687 |
+
def parse_multipage_html_output(text: str) -> Dict[str, str]:
|
688 |
+
return multipage_processor.parse_multipage_html_output(text)
|
689 |
+
|
690 |
+
def format_multipage_output(files: Dict[str, str]) -> str:
|
691 |
+
return multipage_processor.format_multipage_output(files)
|
692 |
+
|
693 |
+
def validate_and_autofix_files(files: Dict[str, str]) -> Dict[str, str]:
|
694 |
+
return multipage_processor.validate_and_autofix_files(files)
|
695 |
+
|
696 |
+
def inline_multipage_into_single_preview(files: Dict[str, str]) -> str:
|
697 |
+
return multipage_processor.inline_multipage_into_single_preview(files)
|
698 |
+
|
699 |
+
def apply_generated_media_to_html(html_content: str, user_prompt: str, **kwargs) -> str:
|
700 |
+
return media_integrator.apply_generated_media_to_html(html_content, user_prompt, **kwargs)
|