mgbam commited on
Commit
7caa96d
·
verified ·
1 Parent(s): 8341729

Create generation_engine.py

Browse files
Files changed (1) hide show
  1. generation_engine.py +850 -0
generation_engine.py ADDED
@@ -0,0 +1,850 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Core code generation engine with LLM interactions and advanced generation logic.
3
+ """
4
+
5
+ import uuid
6
+ import time
7
+ import re
8
+ from typing import Dict, List, Optional, Tuple, Generator, Any
9
+ import gradio as gr
10
+
11
+ from utils import get_inference_client, remove_code_block, extract_text_from_file,
12
+ create_multimodal_message, apply_search_replace_changes, cleanup_session_media, reap_old_media
13
+ from web_utils import extract_website_content, enhance_query_with_search
14
+ from code_processing import (
15
+ is_streamlit_code, is_gradio_code, extract_html_document,
16
+ parse_transformers_js_output, format_transformers_js_output, build_transformers_inline_html,
17
+ parse_svelte_output, format_svelte_output,
18
+ parse_multipage_html_output, format_multipage_output, validate_and_autofix_files,
19
+ inline_multipage_into_single_preview, apply_generated_media_to_html
20
+ )
21
+ from media_generation import MediaGenerator
22
+ from config import (
23
+ HTML_SYSTEM_PROMPT, TRANSFORMERS_JS_SYSTEM_PROMPT, SVELTE_SYSTEM_PROMPT, GENERIC_SYSTEM_PROMPT,
24
+ SEARCH_START, DIVIDER, REPLACE_END, TEMP_DIR_TTL_SECONDS
25
+ )
26
+
27
+ class GenerationEngine:
28
+ """Advanced code generation engine with multi-model support and intelligent processing"""
29
+
30
+ def __init__(self):
31
+ self.media_generator = MediaGenerator()
32
+ self._active_generations = {}
33
+ self._generation_stats = {
34
+ 'total_requests': 0,
35
+ 'successful_generations': 0,
36
+ 'errors': 0,
37
+ 'avg_response_time': 0.0
38
+ }
39
+
40
+ def generate_code(self,
41
+ query: Optional[str] = None,
42
+ vlm_image: Optional[gr.Image] = None,
43
+ gen_image: Optional[gr.Image] = None,
44
+ file: Optional[str] = None,
45
+ website_url: Optional[str] = None,
46
+ settings: Dict[str, Any] = None,
47
+ history: Optional[List[Tuple[str, str]]] = None,
48
+ current_model: Dict = None,
49
+ enable_search: bool = False,
50
+ language: str = "html",
51
+ provider: str = "auto",
52
+ **media_options) -> Generator[Dict[str, Any], None, None]:
53
+ """
54
+ Main code generation method with comprehensive options and streaming support
55
+ """
56
+ start_time = time.time()
57
+ session_id = str(uuid.uuid4())
58
+
59
+ try:
60
+ self._active_generations[session_id] = {
61
+ 'start_time': start_time,
62
+ 'status': 'initializing',
63
+ 'progress': 0
64
+ }
65
+
66
+ # Initialize and validate inputs
67
+ query = query or ''
68
+ history = history or []
69
+ settings = settings or {}
70
+ current_model = current_model or {'id': 'Qwen/Qwen3-Coder-480B-A35B-Instruct', 'name': 'Qwen3-Coder'}
71
+
72
+ # Update statistics
73
+ self._generation_stats['total_requests'] += 1
74
+
75
+ # Cleanup old resources
76
+ self._cleanup_resources(session_id)
77
+
78
+ # Determine if this is a modification request
79
+ has_existing_content = self._check_existing_content(history)
80
+
81
+ # Handle modification requests with search/replace
82
+ if has_existing_content and query.strip():
83
+ yield from self._handle_modification_request(query, history, current_model, provider, session_id)
84
+ return
85
+
86
+ # Process file inputs and website content
87
+ enhanced_query = self._process_inputs(query, file, website_url, enable_search)
88
+
89
+ # Select appropriate system prompt
90
+ system_prompt = self._select_system_prompt(language, enable_search, has_existing_content)
91
+
92
+ # Prepare messages for LLM
93
+ messages = self._prepare_messages(history, system_prompt, enhanced_query, vlm_image)
94
+
95
+ # Generate code with streaming
96
+ yield from self._stream_generation(
97
+ messages, current_model, provider, language,
98
+ enhanced_query, gen_image, session_id, media_options
99
+ )
100
+
101
+ # Update success statistics
102
+ self._generation_stats['successful_generations'] += 1
103
+ elapsed_time = time.time() - start_time
104
+ self._update_avg_response_time(elapsed_time)
105
+
106
+ except Exception as e:
107
+ self._generation_stats['errors'] += 1
108
+ error_message = f"Generation Error: {str(e)}"
109
+ print(f"[GenerationEngine] Error: {error_message}")
110
+
111
+ yield {
112
+ 'code_output': error_message,
113
+ 'history_output': self._convert_history_to_messages(history),
114
+ 'sandbox': f"<div style='padding:2rem;text-align:center;color:#dc2626;background:#fef2f2;border:1px solid #fecaca;border-radius:8px;'><h3>Generation Failed</h3><p>{error_message}</p></div>",
115
+ 'status': 'error'
116
+ }
117
+ finally:
118
+ # Cleanup generation tracking
119
+ self._active_generations.pop(session_id, None)
120
+
121
+ def _cleanup_resources(self, session_id: str):
122
+ """Clean up temporary resources"""
123
+ try:
124
+ cleanup_session_media(session_id)
125
+ reap_old_media()
126
+ except Exception as e:
127
+ print(f"[GenerationEngine] Cleanup warning: {e}")
128
+
129
+ def _check_existing_content(self, history: List[Tuple[str, str]]) -> bool:
130
+ """Check if there's existing content to modify"""
131
+ if not history:
132
+ return False
133
+
134
+ last_assistant_msg = history[-1][1] if history else ""
135
+ content_indicators = [
136
+ '<!DOCTYPE html>', '<html', 'import gradio', 'import streamlit',
137
+ 'def ', 'IMPORTED PROJECT FROM HUGGING FACE SPACE',
138
+ '=== index.html ===', '=== index.js ===', '=== style.css ==='
139
+ ]
140
+
141
+ return any(indicator in last_assistant_msg for indicator in content_indicators)
142
+
143
+ def _handle_modification_request(self, query: str, history: List[Tuple[str, str]],
144
+ current_model: Dict, provider: str, session_id: str) -> Generator:
145
+ """Handle search/replace modification requests"""
146
+ try:
147
+ print("[GenerationEngine] Processing modification request")
148
+
149
+ client = get_inference_client(current_model['id'], provider)
150
+ last_assistant_msg = history[-1][1] if history else ""
151
+
152
+ # Create search/replace system prompt
153
+ system_prompt = f"""You are a code editor assistant. Generate EXACT search/replace blocks for the requested modifications.
154
+
155
+ CRITICAL REQUIREMENTS:
156
+ 1. Use EXACTLY these markers: {SEARCH_START}, {DIVIDER}, {REPLACE_END}
157
+ 2. The SEARCH block must match existing code EXACTLY (including whitespace)
158
+ 3. Generate multiple blocks if needed for different changes
159
+ 4. Only include specific lines that need to change with sufficient context
160
+ 5. DO NOT include explanations outside the blocks
161
+
162
+ Example:
163
+ {SEARCH_START}
164
+ <h1>Old Title</h1>
165
+ {DIVIDER}
166
+ <h1>New Title</h1>
167
+ {REPLACE_END}"""
168
+
169
+ user_prompt = f"""Existing code:
170
+ {last_assistant_msg}
171
+
172
+ Modification request:
173
+ {query}
174
+
175
+ Generate the exact search/replace blocks needed."""
176
+
177
+ messages = [
178
+ {"role": "system", "content": system_prompt},
179
+ {"role": "user", "content": user_prompt}
180
+ ]
181
+
182
+ # Generate modification instructions
183
+ response = self._call_llm(client, current_model, messages, max_tokens=4000, temperature=0.1)
184
+
185
+ if response:
186
+ # Apply changes
187
+ if '=== index.html ===' in last_assistant_msg:
188
+ modified_content = self._apply_transformers_js_changes(last_assistant_msg, response)
189
+ else:
190
+ modified_content = apply_search_replace_changes(last_assistant_msg, response)
191
+
192
+ if modified_content != last_assistant_msg:
193
+ updated_history = history + [(query, modified_content)]
194
+
195
+ yield {
196
+ 'code_output': modified_content,
197
+ 'history': updated_history,
198
+ 'sandbox': self._generate_preview(modified_content, "html"),
199
+ 'history_output': self._convert_history_to_messages(updated_history),
200
+ 'status': 'completed'
201
+ }
202
+ return
203
+
204
+ # Fallback to normal generation if modification failed
205
+ print("[GenerationEngine] Search/replace failed, falling back to normal generation")
206
+
207
+ except Exception as e:
208
+ print(f"[GenerationEngine] Modification request failed: {e}")
209
+
210
+ def _process_inputs(self, query: str, file: Optional[str], website_url: Optional[str],
211
+ enable_search: bool) -> str:
212
+ """Process file and website inputs, enhance with search if enabled"""
213
+ enhanced_query = query
214
+
215
+ # Process file input
216
+ if file:
217
+ file_text = extract_text_from_file(file)
218
+ if file_text:
219
+ file_text = file_text[:5000] # Limit size
220
+ enhanced_query = f"{enhanced_query}\n\n[Reference file content]\n{file_text}"
221
+
222
+ # Process website URL
223
+ if website_url and website_url.strip():
224
+ website_text = extract_website_content(website_url.strip())
225
+ if website_text and not website_text.startswith("Error"):
226
+ website_text = website_text[:8000] # Limit size
227
+ enhanced_query = f"{enhanced_query}\n\n[Website content to redesign]\n{website_text}"
228
+ elif website_text.startswith("Error"):
229
+ fallback_guidance = """
230
+ Since I couldn't extract the website content, please provide:
231
+ 1. What type of website is this?
232
+ 2. What are the main features you want?
233
+ 3. What's the target audience?
234
+ 4. Any specific design preferences?"""
235
+ enhanced_query = f"{enhanced_query}\n\n[Website extraction error: {website_text}]{fallback_guidance}"
236
+
237
+ # Enhance with web search
238
+ if enable_search:
239
+ enhanced_query = enhance_query_with_search(enhanced_query, True)
240
+
241
+ return enhanced_query
242
+
243
+ def _select_system_prompt(self, language: str, enable_search: bool, has_existing_content: bool) -> str:
244
+ """Select appropriate system prompt based on context"""
245
+ if has_existing_content:
246
+ return self._get_followup_system_prompt(language)
247
+
248
+ # Add search enhancement to prompts if enabled
249
+ search_enhancement = """
250
+
251
+ Use web search results when available to incorporate the latest best practices, frameworks, and technologies.""" if enable_search else ""
252
+
253
+ if language == "html":
254
+ return HTML_SYSTEM_PROMPT + search_enhancement
255
+ elif language == "transformers.js":
256
+ return TRANSFORMERS_JS_SYSTEM_PROMPT + search_enhancement
257
+ elif language == "svelte":
258
+ return SVELTE_SYSTEM_PROMPT + search_enhancement
259
+ else:
260
+ return GENERIC_SYSTEM_PROMPT.format(language=language) + search_enhancement
261
+
262
+ def _get_followup_system_prompt(self, language: str) -> str:
263
+ """Get follow-up system prompt for modifications"""
264
+ return f"""You are an expert developer modifying existing {language} code.
265
+ Apply the requested changes using SEARCH/REPLACE blocks with these markers:
266
+ {SEARCH_START}, {DIVIDER}, {REPLACE_END}
267
+
268
+ Requirements:
269
+ - SEARCH blocks must match existing code EXACTLY
270
+ - Provide multiple blocks for different changes
271
+ - Include sufficient context to make matches unique
272
+ - Do not include explanations outside the blocks"""
273
+
274
+ def _prepare_messages(self, history: List[Tuple[str, str]], system_prompt: str,
275
+ enhanced_query: str, vlm_image: Optional[gr.Image]) -> List[Dict]:
276
+ """Prepare messages for LLM interaction"""
277
+ messages = [{'role': 'system', 'content': system_prompt}]
278
+
279
+ # Add history
280
+ for user_msg, assistant_msg in history:
281
+ # Handle multimodal content in history
282
+ if isinstance(user_msg, list):
283
+ text_content = ""
284
+ for item in user_msg:
285
+ if isinstance(item, dict) and item.get("type") == "text":
286
+ text_content += item.get("text", "")
287
+ user_msg = text_content if text_content else str(user_msg)
288
+
289
+ messages.append({'role': 'user', 'content': user_msg})
290
+ messages.append({'role': 'assistant', 'content': assistant_msg})
291
+
292
+ # Add current query
293
+ if vlm_image is not None:
294
+ messages.append(create_multimodal_message(enhanced_query, vlm_image))
295
+ else:
296
+ messages.append({'role': 'user', 'content': enhanced_query})
297
+
298
+ return messages
299
+
300
+ def _stream_generation(self, messages: List[Dict], current_model: Dict, provider: str,
301
+ language: str, query: str, gen_image: Optional[gr.Image],
302
+ session_id: str, media_options: Dict) -> Generator:
303
+ """Stream code generation with real-time updates"""
304
+
305
+ try:
306
+ client = get_inference_client(current_model['id'], provider)
307
+
308
+ # Handle special model cases
309
+ if current_model["id"] == "zai-org/GLM-4.5":
310
+ yield from self._handle_glm_45_generation(client, messages, language, query, gen_image, session_id, media_options)
311
+ return
312
+ elif current_model["id"] == "zai-org/GLM-4.5V":
313
+ yield from self._handle_glm_45v_generation(client, messages, language, query, session_id, media_options)
314
+ return
315
+
316
+ # Standard streaming generation
317
+ completion = self._create_completion_stream(client, current_model, messages)
318
+ content = ""
319
+
320
+ # Process stream with intelligent updates
321
+ for chunk in completion:
322
+ chunk_content = self._extract_chunk_content(chunk, current_model)
323
+
324
+ if chunk_content:
325
+ content += chunk_content
326
+
327
+ # Yield periodic updates based on language type
328
+ if language == "transformers.js":
329
+ yield from self._handle_transformers_streaming(content)
330
+ elif language == "svelte":
331
+ yield from self._handle_svelte_streaming(content)
332
+ else:
333
+ yield from self._handle_standard_streaming(content, language)
334
+
335
+ # Final processing with media integration
336
+ final_content = self._finalize_content(content, language, query, gen_image, session_id, media_options)
337
+
338
+ yield {
339
+ 'code_output': final_content,
340
+ 'history': [(query, final_content)],
341
+ 'sandbox': self._generate_preview(final_content, language),
342
+ 'history_output': self._convert_history_to_messages([(query, final_content)]),
343
+ 'status': 'completed'
344
+ }
345
+
346
+ except Exception as e:
347
+ raise Exception(f"Streaming generation failed: {str(e)}")
348
+
349
+ def _handle_glm_45_generation(self, client, messages, language, query, gen_image, session_id, media_options):
350
+ """Handle GLM-4.5 specific generation"""
351
+ try:
352
+ stream = client.chat.completions.create(
353
+ model="zai-org/GLM-4.5",
354
+ messages=messages,
355
+ stream=True,
356
+ max_tokens=16384,
357
+ )
358
+
359
+ content = ""
360
+ for chunk in stream:
361
+ if chunk.choices[0].delta.content:
362
+ content += chunk.choices[0].delta.content
363
+ clean_code = remove_code_block(content)
364
+
365
+ yield {
366
+ 'code_output': gr.update(value=clean_code, language=self._get_gradio_language(language)),
367
+ 'sandbox': self._generate_preview(clean_code, language),
368
+ 'status': 'streaming'
369
+ }
370
+
371
+ # Apply media generation
372
+ final_content = apply_generated_media_to_html(
373
+ clean_code, query, session_id=session_id, **media_options
374
+ )
375
+
376
+ yield {
377
+ 'code_output': final_content,
378
+ 'history': [(query, final_content)],
379
+ 'sandbox': self._generate_preview(final_content, language),
380
+ 'history_output': self._convert_history_to_messages([(query, final_content)]),
381
+ 'status': 'completed'
382
+ }
383
+
384
+ except Exception as e:
385
+ raise Exception(f"GLM-4.5 generation failed: {str(e)}")
386
+
387
+ def _handle_glm_45v_generation(self, client, messages, language, query, session_id, media_options):
388
+ """Handle GLM-4.5V multimodal generation"""
389
+ try:
390
+ # Enhanced system prompt for multimodal
391
+ enhanced_messages = [
392
+ {"role": "system", "content": """You are an expert web developer creating modern, responsive applications.
393
+
394
+ Output complete, standalone HTML documents that render directly in browsers.
395
+ - Include proper DOCTYPE, head, and body structure
396
+ - Use modern CSS frameworks and responsive design
397
+ - Ensure accessibility and mobile compatibility
398
+ - Output raw HTML without escape characters
399
+
400
+ Always output only the HTML code inside ```html ... ``` blocks."""}
401
+ ] + messages[1:] # Skip original system message
402
+
403
+ stream = client.chat.completions.create(
404
+ model="zai-org/GLM-4.5V",
405
+ messages=enhanced_messages,
406
+ stream=True,
407
+ max_tokens=16384,
408
+ )
409
+
410
+ content = ""
411
+ for chunk in stream:
412
+ if hasattr(chunk, "choices") and chunk.choices and hasattr(chunk.choices[0], "delta"):
413
+ delta_content = getattr(chunk.choices[0].delta, "content", None)
414
+ if delta_content:
415
+ content += delta_content
416
+ clean_code = remove_code_block(content)
417
+
418
+ # Handle escaped characters
419
+ if "\\n" in clean_code:
420
+ clean_code = clean_code.replace("\\n", "\n")
421
+ if "\\t" in clean_code:
422
+ clean_code = clean_code.replace("\\t", "\t")
423
+
424
+ yield {
425
+ 'code_output': gr.update(value=clean_code, language=self._get_gradio_language(language)),
426
+ 'sandbox': self._generate_preview(clean_code, language),
427
+ 'status': 'streaming'
428
+ }
429
+
430
+ # Clean final content
431
+ clean_code = remove_code_block(content)
432
+ if "\\n" in clean_code:
433
+ clean_code = clean_code.replace("\\n", "\n")
434
+ if "\\t" in clean_code:
435
+ clean_code = clean_code.replace("\\t", "\t")
436
+
437
+ yield {
438
+ 'code_output': clean_code,
439
+ 'history': [(query, clean_code)],
440
+ 'sandbox': self._generate_preview(clean_code, language),
441
+ 'history_output': self._convert_history_to_messages([(query, clean_code)]),
442
+ 'status': 'completed'
443
+ }
444
+
445
+ except Exception as e:
446
+ raise Exception(f"GLM-4.5V generation failed: {str(e)}")
447
+
448
+ def _create_completion_stream(self, client, current_model, messages):
449
+ """Create completion stream based on model type"""
450
+ if current_model["id"] in ("codestral-2508", "mistral-medium-2508"):
451
+ return client.chat.stream(
452
+ model=current_model["id"],
453
+ messages=messages,
454
+ max_tokens=16384
455
+ )
456
+ elif current_model["id"] in ("gpt-5", "grok-4", "claude-opus-4.1"):
457
+ model_name_map = {
458
+ "gpt-5": "GPT-5",
459
+ "grok-4": "Grok-4",
460
+ "claude-opus-4.1": "Claude-Opus-4.1"
461
+ }
462
+ return client.chat.completions.create(
463
+ model=model_name_map[current_model["id"]],
464
+ messages=messages,
465
+ stream=True,
466
+ max_tokens=16384
467
+ )
468
+ else:
469
+ return client.chat.completions.create(
470
+ model=current_model["id"],
471
+ messages=messages,
472
+ stream=True,
473
+ max_tokens=16384
474
+ )
475
+
476
+ def _extract_chunk_content(self, chunk, current_model) -> Optional[str]:
477
+ """Extract content from stream chunk based on model format"""
478
+ try:
479
+ if current_model["id"] in ("codestral-2508", "mistral-medium-2508"):
480
+ # Mistral format
481
+ if (hasattr(chunk, "data") and chunk.data and
482
+ hasattr(chunk.data, "choices") and chunk.data.choices and
483
+ hasattr(chunk.data.choices[0], "delta") and
484
+ hasattr(chunk.data.choices[0].delta, "content")):
485
+ return chunk.data.choices[0].delta.content
486
+ else:
487
+ # OpenAI format
488
+ if (hasattr(chunk, "choices") and chunk.choices and
489
+ hasattr(chunk.choices[0], "delta") and
490
+ hasattr(chunk.choices[0].delta, "content")):
491
+
492
+ content = chunk.choices[0].delta.content
493
+
494
+ # Handle GPT-5 thinking placeholders
495
+ if current_model["id"] == "gpt-5" and content:
496
+ if self._is_placeholder_thinking_only(content):
497
+ return None # Skip placeholder content
498
+ return self._strip_placeholder_thinking(content)
499
+
500
+ return content
501
+ except Exception:
502
+ pass
503
+
504
+ return None
505
+
506
+ def _handle_transformers_streaming(self, content: str) -> Generator:
507
+ """Handle streaming for transformers.js projects"""
508
+ files = parse_transformers_js_output(content)
509
+ has_all_files = all([files.get('index.html'), files.get('index.js'), files.get('style.css')])
510
+
511
+ if has_all_files:
512
+ merged_html = build_transformers_inline_html(files)
513
+ yield {
514
+ 'code_output': gr.update(value=merged_html, language="html"),
515
+ 'sandbox': self._send_transformers_to_sandbox(files),
516
+ 'status': 'streaming'
517
+ }
518
+ else:
519
+ yield {
520
+ 'code_output': gr.update(value=content, language="html"),
521
+ 'sandbox': "<div style='padding:1em;text-align:center;color:#888;'>Generating transformers.js app...</div>",
522
+ 'status': 'streaming'
523
+ }
524
+
525
+ def _handle_svelte_streaming(self, content: str) -> Generator:
526
+ """Handle streaming for Svelte projects"""
527
+ yield {
528
+ 'code_output': gr.update(value=content, language="html"),
529
+ 'sandbox': "<div style='padding:1em;text-align:center;color:#888;'>Generating Svelte app...</div>",
530
+ 'status': 'streaming'
531
+ }
532
+
533
+ def _handle_standard_streaming(self, content: str, language: str) -> Generator:
534
+ """Handle streaming for standard projects"""
535
+ clean_code = remove_code_block(content)
536
+ preview = self._generate_preview(clean_code, language)
537
+
538
+ yield {
539
+ 'code_output': gr.update(value=clean_code, language=self._get_gradio_language(language)),
540
+ 'sandbox': preview,
541
+ 'status': 'streaming'
542
+ }
543
+
544
+ def _finalize_content(self, content: str, language: str, query: str,
545
+ gen_image: Optional[gr.Image], session_id: str, media_options: Dict) -> str:
546
+ """Finalize content with post-processing and media integration"""
547
+ final_content = remove_code_block(content)
548
+
549
+ # Apply media generation for HTML projects
550
+ if language == "html":
551
+ final_content = apply_generated_media_to_html(
552
+ final_content, query, session_id=session_id, **media_options
553
+ )
554
+
555
+ return final_content
556
+
557
+ def _generate_preview(self, content: str, language: str) -> str:
558
+ """Generate preview HTML for different content types"""
559
+ if language == "html":
560
+ # Handle multi-page HTML
561
+ files = parse_multipage_html_output(content)
562
+ files = validate_and_autofix_files(files)
563
+ if files and files.get('index.html'):
564
+ merged = inline_multipage_into_single_preview(files)
565
+ return self._send_to_sandbox(merged)
566
+ return self._send_to_sandbox(content)
567
+
568
+ elif language == "streamlit":
569
+ if is_streamlit_code(content):
570
+ return self._send_streamlit_to_stlite(content)
571
+ return "<div style='padding:1em;color:#888;text-align:center;'>Add `import streamlit as st` to enable Streamlit preview.</div>"
572
+
573
+ elif language == "gradio":
574
+ if is_gradio_code(content):
575
+ return self._send_gradio_to_lite(content)
576
+ return "<div style='padding:1em;color:#888;text-align:center;'>Add `import gradio as gr` to enable Gradio preview.</div>"
577
+
578
+ elif language == "python":
579
+ if is_streamlit_code(content):
580
+ return self._send_streamlit_to_stlite(content)
581
+ elif is_gradio_code(content):
582
+ return self._send_gradio_to_lite(content)
583
+ return "<div style='padding:1em;color:#888;text-align:center;'>Preview available for Streamlit/Gradio apps. Add the appropriate import.</div>"
584
+
585
+ elif language == "transformers.js":
586
+ files = parse_transformers_js_output(content)
587
+ if files['index.html']:
588
+ return self._send_transformers_to_sandbox(files)
589
+
590
+ return "<div style='padding:1em;color:#888;text-align:center;'>Preview is only available for HTML, Streamlit, and Gradio applications.</div>"
591
+
592
+ def _send_to_sandbox(self, code: str) -> str:
593
+ """Send HTML to sandboxed iframe with cache busting"""
594
+ import base64
595
+ import time
596
+
597
+ # Add cache-busting timestamp
598
+ timestamp = str(int(time.time() * 1000))
599
+ cache_bust_comment = f"<!-- refresh-{timestamp} -->"
600
+ html_doc = cache_bust_comment + (code or "").strip()
601
+
602
+ # Inline file URLs as data URIs for iframe compatibility
603
+ try:
604
+ html_doc = self._inline_file_urls_as_data_uris(html_doc)
605
+ except Exception:
606
+ pass
607
+
608
+ encoded_html = base64.b64encode(html_doc.encode('utf-8')).decode('utf-8')
609
+ data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
610
+ return f'<iframe src="{data_uri}" width="100%" height="920px" sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-modals allow-presentation" allow="display-capture" key="preview-{timestamp}"></iframe>'
611
+
612
+ def _send_streamlit_to_stlite(self, code: str) -> str:
613
+ """Send Streamlit code to stlite preview"""
614
+ import base64
615
+
616
+ html_doc = f"""<!doctype html>
617
+ <html>
618
+ <head>
619
+ <meta charset="UTF-8" />
620
+ <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
621
+ <title>Streamlit Preview</title>
622
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@stlite/[email protected]/build/stlite.css" />
623
+ <style>html,body{{margin:0;padding:0;height:100%;}} streamlit-app{{display:block;height:100%;}}</style>
624
+ <script type="module" src="https://cdn.jsdelivr.net/npm/@stlite/[email protected]/build/stlite.js"></script>
625
+ </head>
626
+ <body>
627
+ <streamlit-app>{code or ""}</streamlit-app>
628
+ </body>
629
+ </html>"""
630
+
631
+ encoded_html = base64.b64encode(html_doc.encode('utf-8')).decode('utf-8')
632
+ data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
633
+ return f'<iframe src="{data_uri}" width="100%" height="920px" sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-modals allow-presentation" allow="display-capture"></iframe>'
634
+
635
+ def _send_gradio_to_lite(self, code: str) -> str:
636
+ """Send Gradio code to gradio-lite preview"""
637
+ import base64
638
+
639
+ html_doc = f"""<!doctype html>
640
+ <html>
641
+ <head>
642
+ <meta charset="UTF-8" />
643
+ <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no" />
644
+ <title>Gradio Preview</title>
645
+ <script type="module" crossorigin src="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.js"></script>
646
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@gradio/lite/dist/lite.css" />
647
+ <style>html,body{{margin:0;padding:0;height:100%;}} gradio-lite{{display:block;height:100%;}}</style>
648
+ </head>
649
+ <body>
650
+ <gradio-lite>{code or ""}</gradio-lite>
651
+ </body>
652
+ </html>"""
653
+
654
+ encoded_html = base64.b64encode(html_doc.encode('utf-8')).decode('utf-8')
655
+ data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
656
+ return f'<iframe src="{data_uri}" width="100%" height="920px" sandbox="allow-scripts allow-same-origin allow-forms allow-popups allow-modals allow-presentation" allow="display-capture"></iframe>'
657
+
658
+ def _send_transformers_to_sandbox(self, files: Dict[str, str]) -> str:
659
+ """Send transformers.js files to sandbox"""
660
+ merged_html = build_transformers_inline_html(files)
661
+ return self._send_to_sandbox(merged_html)
662
+
663
+ def _inline_file_urls_as_data_uris(self, html_doc: str) -> str:
664
+ """Convert file:// URLs to data URIs for iframe compatibility"""
665
+ import base64
666
+ import mimetypes
667
+ import urllib.parse
668
+
669
+ def _file_url_to_data_uri(file_url: str) -> Optional[str]:
670
+ try:
671
+ parsed = urllib.parse.urlparse(file_url)
672
+ path = urllib.parse.unquote(parsed.path)
673
+ if not path:
674
+ return None
675
+ with open(path, 'rb') as f:
676
+ raw = f.read()
677
+ mime = mimetypes.guess_type(path)[0] or 'application/octet-stream'
678
+ b64 = base64.b64encode(raw).decode()
679
+ return f"data:{mime};base64,{b64}"
680
+ except Exception:
681
+ return None
682
+
683
+ def _replace_file_url(match):
684
+ url = match.group(1)
685
+ data_uri = _file_url_to_data_uri(url)
686
+ return f'src="{data_uri}"' if data_uri else match.group(0)
687
+
688
+ html_doc = re.sub(r'src="(file:[^"]+)"', _replace_file_url, html_doc)
689
+ html_doc = re.sub(r"src='(file:[^']+)'", _replace_file_url, html_doc)
690
+
691
+ return html_doc
692
+
693
+ def _apply_transformers_js_changes(self, original_content: str, changes_text: str) -> str:
694
+ """Apply search/replace changes to transformers.js content"""
695
+ # Parse original content
696
+ files = parse_transformers_js_output(original_content)
697
+
698
+ # Apply changes to each file
699
+ blocks = self._parse_search_replace_blocks(changes_text)
700
+
701
+ for block in blocks:
702
+ search_text, replace_text = block
703
+
704
+ # Determine target file and apply changes
705
+ for file_key in ['index.html', 'index.js', 'style.css']:
706
+ if search_text in files[file_key]:
707
+ files[file_key] = files[file_key].replace(search_text, replace_text)
708
+ break
709
+
710
+ return format_transformers_js_output(files)
711
+
712
+ def _parse_search_replace_blocks(self, changes_text: str) -> List[Tuple[str, str]]:
713
+ """Parse search/replace blocks from text"""
714
+ blocks = []
715
+ current_block = ""
716
+ lines = changes_text.split('\n')
717
+
718
+ for line in lines:
719
+ if line.strip() == SEARCH_START:
720
+ if current_block.strip():
721
+ blocks.append(current_block.strip())
722
+ current_block = line + '\n'
723
+ elif line.strip() == REPLACE_END:
724
+ current_block += line + '\n'
725
+ blocks.append(current_block.strip())
726
+ current_block = ""
727
+ else:
728
+ current_block += line + '\n'
729
+
730
+ if current_block.strip():
731
+ blocks.append(current_block.strip())
732
+
733
+ # Parse each block into search/replace pairs
734
+ parsed_blocks = []
735
+ for block in blocks:
736
+ lines = block.split('\n')
737
+ search_lines = []
738
+ replace_lines = []
739
+ in_search = False
740
+ in_replace = False
741
+
742
+ for line in lines:
743
+ if line.strip() == SEARCH_START:
744
+ in_search = True
745
+ in_replace = False
746
+ elif line.strip() == DIVIDER:
747
+ in_search = False
748
+ in_replace = True
749
+ elif line.strip() == REPLACE_END:
750
+ in_replace = False
751
+ elif in_search:
752
+ search_lines.append(line)
753
+ elif in_replace:
754
+ replace_lines.append(line)
755
+
756
+ if search_lines:
757
+ search_text = '\n'.join(search_lines).strip()
758
+ replace_text = '\n'.join(replace_lines).strip()
759
+ parsed_blocks.append((search_text, replace_text))
760
+
761
+ return parsed_blocks
762
+
763
+ def _call_llm(self, client, current_model: Dict, messages: List[Dict],
764
+ max_tokens: int = 4000, temperature: float = 0.7) -> Optional[str]:
765
+ """Call LLM and return response content"""
766
+ try:
767
+ if current_model.get('type') == 'openai':
768
+ response = client.chat.completions.create(
769
+ model=current_model['id'],
770
+ messages=messages,
771
+ max_tokens=max_tokens,
772
+ temperature=temperature
773
+ )
774
+ return response.choices[0].message.content
775
+ elif current_model.get('type') == 'mistral':
776
+ response = client.chat.complete(
777
+ model=current_model['id'],
778
+ messages=messages,
779
+ max_tokens=max_tokens,
780
+ temperature=temperature
781
+ )
782
+ return response.choices[0].message.content
783
+ else:
784
+ completion = client.chat.completions.create(
785
+ model=current_model['id'],
786
+ messages=messages,
787
+ max_tokens=max_tokens,
788
+ temperature=temperature
789
+ )
790
+ return completion.choices[0].message.content
791
+ except Exception as e:
792
+ print(f"[GenerationEngine] LLM call failed: {e}")
793
+ return None
794
+
795
+ def _convert_history_to_messages(self, history: List[Tuple[str, str]]) -> List[Dict[str, str]]:
796
+ """Convert history tuples to message format"""
797
+ messages = []
798
+ for user_msg, assistant_msg in history:
799
+ if isinstance(user_msg, list):
800
+ text_content = ""
801
+ for item in user_msg:
802
+ if isinstance(item, dict) and item.get("type") == "text":
803
+ text_content += item.get("text", "")
804
+ user_msg = text_content if text_content else str(user_msg)
805
+
806
+ messages.append({"role": "user", "content": user_msg})
807
+ messages.append({"role": "assistant", "content": assistant_msg})
808
+ return messages
809
+
810
+ def _get_gradio_language(self, language: str) -> Optional[str]:
811
+ """Get appropriate Gradio language for syntax highlighting"""
812
+ from config import get_gradio_language
813
+ return get_gradio_language(language)
814
+
815
+ def _is_placeholder_thinking_only(self, text: str) -> bool:
816
+ """Check if text contains only thinking placeholders"""
817
+ if not text:
818
+ return False
819
+ stripped = text.strip()
820
+ if not stripped:
821
+ return False
822
+ return re.fullmatch(r"(?s)(?:\s*Thinking\.\.\.(?:\s*\(\d+s elapsed\))?\s*)+", stripped) is not None
823
+
824
+ def _strip_placeholder_thinking(self, text: str) -> str:
825
+ """Remove placeholder thinking status lines"""
826
+ if not text:
827
+ return text
828
+ return re.sub(r"(?mi)^[\t ]*Thinking\.\.\.(?:\s*\(\d+s elapsed\))?[\t ]*$\n?", "", text)
829
+
830
+ def _update_avg_response_time(self, elapsed_time: float):
831
+ """Update average response time statistic"""
832
+ current_avg = self._generation_stats['avg_response_time']
833
+ total_successful = self._generation_stats['successful_generations']
834
+
835
+ if total_successful <= 1:
836
+ self._generation_stats['avg_response_time'] = elapsed_time
837
+ else:
838
+ # Weighted average
839
+ self._generation_stats['avg_response_time'] = (current_avg * (total_successful - 1) + elapsed_time) / total_successful
840
+
841
+ def get_generation_stats(self) -> Dict[str, Any]:
842
+ """Get current generation statistics"""
843
+ return self._generation_stats.copy()
844
+
845
+ def get_active_generations(self) -> Dict[str, Dict[str, Any]]:
846
+ """Get information about currently active generations"""
847
+ return self._active_generations.copy()
848
+
849
+ # Global generation engine instance
850
+ generation_engine = GenerationEngine()