SolshineMisfit commited on
Commit
3dd4f84
·
verified ·
1 Parent(s): 9991f02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -169
app.py CHANGED
@@ -350,181 +350,62 @@ def get_current_time_in_timezone(timezone: str) -> str:
350
 
351
  final_answer = FinalAnswerTool()
352
 
353
- # Create a custom model class that implements the HfApiModel interface
354
- class FallbackChainModel:
355
- """
356
- A custom model class that implements the interface expected by smolagents,
357
- with an automatic fallback chain from primary to backup models.
358
-
359
- Following smolagents best practices:
360
- - Simple implementation with robust logging
361
- - Thorough error handling with detailed messages
362
- - Support for all parameters used by CodeAgent
363
- """
364
- def __init__(self):
365
- # Initialize token tracking for compatibility with smolagents
366
- self.last_input_token_count = 0
367
- self.last_output_token_count = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
368
 
369
- # Set up the backup model as a proper HfApiModel instance
370
- self.backup_model = HfApiModel(
371
- max_tokens=2096,
372
- temperature=0.5,
373
- model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',
 
 
 
 
 
 
 
 
374
  )
375
 
376
- print("Initialized FallbackChainModel with DeepSeek as primary and HfApiModel as backup")
377
-
378
- def __call__(self, prompt, stop_sequences=None, temperature=0.5, max_tokens=2096):
379
- """
380
- Call method that matches the HfApiModel interface required by smolagents.
381
 
382
- Args:
383
- prompt: The input prompt (can be string or structured format)
384
- stop_sequences: Optional list of sequences to stop generation
385
- temperature: Controls randomness in generation
386
- max_tokens: Maximum tokens to generate
387
-
388
- Returns:
389
- Generated text from the model
390
- """
391
- # Track all calls and parameters for debugging
392
- print(f"Model call with temp={temperature}, max_tokens={max_tokens}")
393
- if stop_sequences:
394
- print(f"Using stop sequences: {stop_sequences}")
395
 
396
- # Try the primary model (DeepSeek)
397
- try:
398
- print("🚀 Attempting to use primary model: DeepSeek-R1-Distill-Qwen-32B")
399
-
400
- # Get API key
401
- api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
402
- if not api_key:
403
- raise ValueError("No Hugging Face API key found")
404
-
405
- # Format the prompt appropriately for DeepSeek
406
- # For structured prompts (needed by some agents), convert to proper format
407
- if isinstance(prompt, list) and all(isinstance(item, dict) for item in prompt):
408
- # Handle chat format with roles
409
- messages = prompt
410
- print(f"Using structured chat format with {len(messages)} messages")
411
- else:
412
- # Simple string prompt
413
- if isinstance(prompt, (dict, list)):
414
- # If it's a dict or list but not in expected chat format, convert to string
415
- import json
416
- prompt_str = json.dumps(prompt)
417
- print("Converting complex prompt to JSON string")
418
- else:
419
- prompt_str = str(prompt)
420
-
421
- # Create a single user message
422
- messages = [{"role": "user", "content": prompt_str}]
423
-
424
- # Create the InferenceClient instance
425
- client = InferenceClient(
426
- provider="hf-inference",
427
- api_key=api_key
428
- )
429
-
430
- # Call the DeepSeek model
431
- completion = client.chat.completions.create(
432
- model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
433
- messages=messages,
434
- max_tokens=max_tokens,
435
- temperature=temperature,
436
- stop=stop_sequences # Pass stop_sequences as stop parameter
437
- )
438
-
439
- # Extract the response text
440
- response = completion.choices[0].message.content
441
-
442
- # Track token usage (approximate for now)
443
- prompt_text = str(prompt)
444
- self.last_input_token_count = len(prompt_text.split()) * 1.3 # Rough approximation
445
- self.last_output_token_count = len(response.split()) * 1.3 # Rough approximation
446
-
447
- print("✅ Primary model call successful")
448
- return response
449
-
450
- except Exception as primary_error:
451
- # Detailed error logging for the primary model
452
- print(f"❌ Primary model error: {str(primary_error)}")
453
-
454
- # Try the backup model (original endpoint)
455
- try:
456
- print("🔄 Falling back to backup model (HfApiModel endpoint)")
457
- # Direct call to backup with all parameters
458
- result = self.backup_model(
459
- prompt,
460
- stop_sequences=stop_sequences,
461
- temperature=temperature,
462
- max_tokens=max_tokens
463
- )
464
- print("✅ Backup model call successful")
465
- return result
466
-
467
- except Exception as backup_error:
468
- # Log backup error and try final fallbacks
469
- print(f"❌ Backup model error: {str(backup_error)}")
470
- print("🔄 Trying additional fallback models...")
471
-
472
- # Additional fallback options (last resort)
473
- fallbacks = [
474
- {
475
- "provider": "sambanova",
476
- "model_name": "Qwen/Qwen2.5-Coder-32B-Instruct",
477
- "display_name": "Qwen 2.5 Coder 32B"
478
- }
479
- ]
480
-
481
- # Get API key again to ensure it's available
482
- api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
483
- if not api_key:
484
- raise ValueError("No Hugging Face API key found")
485
-
486
- # Try each fallback in sequence
487
- for fallback in fallbacks:
488
- try:
489
- print(f"🔄 Trying fallback model: {fallback['display_name']}")
490
- client = InferenceClient(provider=fallback["provider"], api_key=api_key)
491
-
492
- # Format prompt for this model
493
- if isinstance(prompt, (dict, list)):
494
- import json
495
- prompt_str = json.dumps(prompt)
496
- else:
497
- prompt_str = str(prompt)
498
-
499
- messages = [{"role": "user", "content": prompt_str}]
500
-
501
- # Call the fallback model
502
- completion = client.chat.completions.create(
503
- model=fallback["model_name"],
504
- messages=messages,
505
- max_tokens=max_tokens,
506
- temperature=temperature,
507
- stop=stop_sequences
508
- )
509
-
510
- print(f"✅ Successfully used fallback model: {fallback['display_name']}")
511
- return completion.choices[0].message.content
512
-
513
- except Exception as e:
514
- print(f"❌ Fallback model {fallback['display_name']} failed: {str(e)}")
515
- continue
516
-
517
- # If all fallbacks failed, raise a comprehensive error
518
- error_msg = (
519
- f"All models in fallback chain failed. "
520
- f"Primary error: {str(primary_error)}. "
521
- f"Backup error: {str(backup_error)}"
522
- )
523
- print(f"❌ FATAL ERROR: {error_msg}")
524
- raise RuntimeError(error_msg)
525
 
526
- # Create an instance of our fallback chain model
527
- model = FallbackChainModel()
528
 
529
  # Import tool from Hub
530
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)
 
350
 
351
  final_answer = FinalAnswerTool()
352
 
353
+ # Keep the original endpoint as a backup
354
+ backup_model = HfApiModel(
355
+ max_tokens=2096,
356
+ temperature=0.5,
357
+ model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud',
358
+ )
359
+
360
+ def model_with_fallback(prompt, **kwargs):
361
+ """Simple model function with fallback to the original endpoint."""
362
+ try:
363
+ print("Using primary model: DeepSeek-R1-Distill-Qwen-32B")
364
+ # Get API key
365
+ api_key = os.getenv("HF_API_KEY") or os.getenv("HUGGINGFACE_API_KEY")
366
+ if not api_key:
367
+ raise ValueError("No Hugging Face API key found")
368
+
369
+ # Format prompt for the API
370
+ if isinstance(prompt, (dict, list)):
371
+ import json
372
+ prompt_text = json.dumps(prompt)
373
+ else:
374
+ prompt_text = str(prompt)
375
+
376
+ # Create client and call model
377
+ client = InferenceClient(
378
+ provider="hf-inference",
379
+ api_key=api_key
380
+ )
381
 
382
+ # Extract parameters
383
+ temperature = kwargs.get('temperature', 0.5)
384
+ max_tokens = kwargs.get('max_tokens', 2096)
385
+ stop_sequences = kwargs.get('stop_sequences', None)
386
+
387
+ # Call the API
388
+ messages = [{"role": "user", "content": prompt_text}]
389
+ completion = client.chat.completions.create(
390
+ model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
391
+ messages=messages,
392
+ max_tokens=max_tokens,
393
+ temperature=temperature,
394
+ stop=stop_sequences
395
  )
396
 
397
+ print("Primary model successful")
398
+ return completion.choices[0].message.content
 
 
 
399
 
400
+ except Exception as e:
401
+ print(f"Primary model failed: {str(e)}")
402
+ print("Falling back to backup model")
 
 
 
 
 
 
 
 
 
 
403
 
404
+ # Use the backup model
405
+ return backup_model(prompt, **kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
 
407
+ # Set up the model for the agent
408
+ model = backup_model # Set to backup model directly for now to ensure it works
409
 
410
  # Import tool from Hub
411
  image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True)