import pdb import logging from dotenv import load_dotenv load_dotenv() import os import glob import asyncio import argparse import os logger = logging.getLogger(__name__) import gradio as gr import inspect from functools import wraps from browser_use.agent.service import Agent from playwright.async_api import async_playwright from browser_use.browser.browser import Browser, BrowserConfig from browser_use.browser.context import ( BrowserContextConfig, BrowserContextWindowSize, ) from langchain_ollama import ChatOllama from playwright.async_api import async_playwright from src.utils.agent_state import AgentState from src.utils import utils from src.agent.custom_agent import CustomAgent from src.browser.custom_browser import CustomBrowser from src.agent.custom_prompts import CustomSystemPrompt, CustomAgentMessagePrompt from src.browser.custom_context import BrowserContextConfig, CustomBrowserContext from src.controller.custom_controller import CustomController from gradio.themes import Citrus, Default, Glass, Monochrome, Ocean, Origin, Soft, Base from src.utils.utils import update_model_dropdown, get_latest_files, capture_screenshot, MissingAPIKeyError from src.utils import utils # Global variables for persistence _global_browser = None _global_browser_context = None _global_agent = None # Create the global agent state instance _global_agent_state = AgentState() # webui config webui_config_manager = utils.ConfigManager() def scan_and_register_components(blocks): """扫描一个 Blocks 对象并注册其中的所有交互式组件,但不包括按钮""" global webui_config_manager def traverse_blocks(block, prefix=""): registered = 0 # 处理 Blocks 自身的组件 if hasattr(block, "children"): for i, child in enumerate(block.children): if isinstance(child, gr.components.Component): # 排除按钮 (Button) 组件 if getattr(child, "interactive", False) and not isinstance(child, gr.Button): name = f"{prefix}component_{i}" if hasattr(child, "label") and child.label: # 使用标签作为名称的一部分 label = child.label name = f"{prefix}{label}" logger.debug(f"Registering component: {name}") webui_config_manager.register_component(name, child) registered += 1 elif hasattr(child, "children"): # 递归处理嵌套的 Blocks new_prefix = f"{prefix}block_{i}_" registered += traverse_blocks(child, new_prefix) return registered total = traverse_blocks(blocks) logger.info(f"Total registered components: {total}") def save_current_config(): return webui_config_manager.save_current_config() def update_ui_from_config(config_file): return webui_config_manager.update_ui_from_config(config_file) def resolve_sensitive_env_variables(text): """ Replace environment variable placeholders ($SENSITIVE_*) with their values. Only replaces variables that start with SENSITIVE_. """ if not text: return text import re # Find all $SENSITIVE_* patterns env_vars = re.findall(r'\$SENSITIVE_[A-Za-z0-9_]*', text) result = text for var in env_vars: # Remove the $ prefix to get the actual environment variable name env_name = var[1:] # removes the $ env_value = os.getenv(env_name) if env_value is not None: # Replace $SENSITIVE_VAR_NAME with its value result = result.replace(var, env_value) return result async def stop_agent(): """Request the agent to stop and update UI with enhanced feedback""" global _global_agent try: if _global_agent is not None: # Request stop _global_agent.stop() # Update UI immediately message = "Stop requested - the agent will halt at the next safe point" logger.info(f"🛑 {message}") # Return UI updates return ( gr.update(value="Stopping...", interactive=False), # stop_button gr.update(interactive=False), # run_button ) except Exception as e: error_msg = f"Error during stop: {str(e)}" logger.error(error_msg) return ( gr.update(value="Stop", interactive=True), gr.update(interactive=True) ) async def stop_research_agent(): """Request the agent to stop and update UI with enhanced feedback""" global _global_agent_state try: # Request stop _global_agent_state.request_stop() # Update UI immediately message = "Stop requested - the agent will halt at the next safe point" logger.info(f"🛑 {message}") # Return UI updates return ( # errors_output gr.update(value="Stopping...", interactive=False), # stop_button gr.update(interactive=False), # run_button ) except Exception as e: error_msg = f"Error during stop: {str(e)}" logger.error(error_msg) return ( gr.update(value="Stop", interactive=True), gr.update(interactive=True) ) async def run_browser_agent( agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, save_recording_path, save_agent_history_path, save_trace_path, enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method, chrome_cdp, max_input_tokens ): try: # Disable recording if the checkbox is unchecked if not enable_recording: save_recording_path = None # Ensure the recording directory exists if recording is enabled if save_recording_path: os.makedirs(save_recording_path, exist_ok=True) # Get the list of existing videos before the agent runs existing_videos = set() if save_recording_path: existing_videos = set( glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]")) ) task = resolve_sensitive_env_variables(task) # Run the agent llm = utils.get_llm_model( provider=llm_provider, model_name=llm_model_name, num_ctx=llm_num_ctx, temperature=llm_temperature, base_url=llm_base_url, api_key=llm_api_key, ) if agent_type == "org": final_result, errors, model_actions, model_thoughts, trace_file, history_file = await run_org_agent( llm=llm, use_own_browser=use_own_browser, keep_browser_open=keep_browser_open, headless=headless, disable_security=disable_security, window_w=window_w, window_h=window_h, save_recording_path=save_recording_path, save_agent_history_path=save_agent_history_path, save_trace_path=save_trace_path, task=task, max_steps=max_steps, use_vision=use_vision, max_actions_per_step=max_actions_per_step, tool_calling_method=tool_calling_method, chrome_cdp=chrome_cdp, max_input_tokens=max_input_tokens ) elif agent_type == "custom": final_result, errors, model_actions, model_thoughts, trace_file, history_file = await run_custom_agent( llm=llm, use_own_browser=use_own_browser, keep_browser_open=keep_browser_open, headless=headless, disable_security=disable_security, window_w=window_w, window_h=window_h, save_recording_path=save_recording_path, save_agent_history_path=save_agent_history_path, save_trace_path=save_trace_path, task=task, add_infos=add_infos, max_steps=max_steps, use_vision=use_vision, max_actions_per_step=max_actions_per_step, tool_calling_method=tool_calling_method, chrome_cdp=chrome_cdp, max_input_tokens=max_input_tokens ) else: raise ValueError(f"Invalid agent type: {agent_type}") # Get the list of videos after the agent runs (if recording is enabled) # latest_video = None # if save_recording_path: # new_videos = set( # glob.glob(os.path.join(save_recording_path, "*.[mM][pP]4")) # + glob.glob(os.path.join(save_recording_path, "*.[wW][eE][bB][mM]")) # ) # if new_videos - existing_videos: # latest_video = list(new_videos - existing_videos)[0] # Get the first new video gif_path = os.path.join(os.path.dirname(__file__), "agent_history.gif") return ( final_result, errors, model_actions, model_thoughts, gif_path, trace_file, history_file, gr.update(value="Stop", interactive=True), # Re-enable stop button gr.update(interactive=True) # Re-enable run button ) except MissingAPIKeyError as e: logger.error(str(e)) raise gr.Error(str(e), print_exception=False) except Exception as e: import traceback traceback.print_exc() errors = str(e) + "\n" + traceback.format_exc() return ( '', # final_result errors, # errors '', # model_actions '', # model_thoughts None, # latest_video None, # history_file None, # trace_file gr.update(value="Stop", interactive=True), # Re-enable stop button gr.update(interactive=True) # Re-enable run button ) async def run_org_agent( llm, use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, save_recording_path, save_agent_history_path, save_trace_path, task, max_steps, use_vision, max_actions_per_step, tool_calling_method, chrome_cdp, max_input_tokens ): try: global _global_browser, _global_browser_context, _global_agent extra_chromium_args = ["--accept_downloads=True", f"--window-size={window_w},{window_h}"] cdp_url = chrome_cdp if use_own_browser: cdp_url = os.getenv("CHROME_CDP", chrome_cdp) chrome_path = os.getenv("CHROME_PATH", None) if chrome_path == "": chrome_path = None chrome_user_data = os.getenv("CHROME_USER_DATA", None) if chrome_user_data: extra_chromium_args += [f"--user-data-dir={chrome_user_data}"] else: chrome_path = None if _global_browser is None: _global_browser = Browser( config=BrowserConfig( headless=headless, cdp_url=cdp_url, disable_security=disable_security, chrome_instance_path=chrome_path, extra_chromium_args=extra_chromium_args, ) ) if _global_browser_context is None: _global_browser_context = await _global_browser.new_context( config=BrowserContextConfig( trace_path=save_trace_path if save_trace_path else None, save_recording_path=save_recording_path if save_recording_path else None, save_downloads_path="./tmp/downloads", no_viewport=False, browser_window_size=BrowserContextWindowSize( width=window_w, height=window_h ), ) ) if _global_agent is None: _global_agent = Agent( task=task, llm=llm, use_vision=use_vision, browser=_global_browser, browser_context=_global_browser_context, max_actions_per_step=max_actions_per_step, tool_calling_method=tool_calling_method, max_input_tokens=max_input_tokens, generate_gif=True ) history = await _global_agent.run(max_steps=max_steps) history_file = os.path.join(save_agent_history_path, f"{_global_agent.state.agent_id}.json") _global_agent.save_history(history_file) final_result = history.final_result() errors = history.errors() model_actions = history.model_actions() model_thoughts = history.model_thoughts() trace_file = get_latest_files(save_trace_path) return final_result, errors, model_actions, model_thoughts, trace_file.get('.zip'), history_file except Exception as e: import traceback traceback.print_exc() errors = str(e) + "\n" + traceback.format_exc() return '', errors, '', '', None, None finally: _global_agent = None # Handle cleanup based on persistence configuration if not keep_browser_open: if _global_browser_context: await _global_browser_context.close() _global_browser_context = None if _global_browser: await _global_browser.close() _global_browser = None async def run_custom_agent( llm, use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, save_recording_path, save_agent_history_path, save_trace_path, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method, chrome_cdp, max_input_tokens ): try: global _global_browser, _global_browser_context, _global_agent extra_chromium_args = ["--accept_downloads=True", f"--window-size={window_w},{window_h}"] cdp_url = chrome_cdp if use_own_browser: cdp_url = os.getenv("CHROME_CDP", chrome_cdp) chrome_path = os.getenv("CHROME_PATH", None) if chrome_path == "": chrome_path = None chrome_user_data = os.getenv("CHROME_USER_DATA", None) if chrome_user_data: extra_chromium_args += [f"--user-data-dir={chrome_user_data}"] else: chrome_path = None controller = CustomController() # Initialize global browser if needed # if chrome_cdp not empty string nor None if (_global_browser is None) or (cdp_url and cdp_url != "" and cdp_url != None): _global_browser = CustomBrowser( config=BrowserConfig( headless=headless, disable_security=disable_security, cdp_url=cdp_url, chrome_instance_path=chrome_path, extra_chromium_args=extra_chromium_args, ) ) if _global_browser_context is None or (chrome_cdp and cdp_url != "" and cdp_url != None): _global_browser_context = await _global_browser.new_context( config=BrowserContextConfig( trace_path=save_trace_path if save_trace_path else None, save_recording_path=save_recording_path if save_recording_path else None, no_viewport=False, save_downloads_path="./tmp/downloads", browser_window_size=BrowserContextWindowSize( width=window_w, height=window_h ), ) ) # Create and run agent if _global_agent is None: _global_agent = CustomAgent( task=task, add_infos=add_infos, use_vision=use_vision, llm=llm, browser=_global_browser, browser_context=_global_browser_context, controller=controller, system_prompt_class=CustomSystemPrompt, agent_prompt_class=CustomAgentMessagePrompt, max_actions_per_step=max_actions_per_step, tool_calling_method=tool_calling_method, max_input_tokens=max_input_tokens, generate_gif=True ) history = await _global_agent.run(max_steps=max_steps) history_file = os.path.join(save_agent_history_path, f"{_global_agent.state.agent_id}.json") _global_agent.save_history(history_file) final_result = history.final_result() errors = history.errors() model_actions = history.model_actions() model_thoughts = history.model_thoughts() trace_file = get_latest_files(save_trace_path) return final_result, errors, model_actions, model_thoughts, trace_file.get('.zip'), history_file except Exception as e: import traceback traceback.print_exc() errors = str(e) + "\n" + traceback.format_exc() return '', errors, '', '', None, None finally: _global_agent = None # Handle cleanup based on persistence configuration if not keep_browser_open: if _global_browser_context: await _global_browser_context.close() _global_browser_context = None if _global_browser: await _global_browser.close() _global_browser = None async def run_with_stream( agent_type, llm_provider, llm_model_name, llm_num_ctx, llm_temperature, llm_base_url, llm_api_key, use_own_browser, keep_browser_open, headless, disable_security, window_w, window_h, save_recording_path, save_agent_history_path, save_trace_path, enable_recording, task, add_infos, max_steps, use_vision, max_actions_per_step, tool_calling_method, chrome_cdp, max_input_tokens ): global _global_agent stream_vw = 80 stream_vh = int(80 * window_h // window_w) if not headless: result = await run_browser_agent( agent_type=agent_type, llm_provider=llm_provider, llm_model_name=llm_model_name, llm_num_ctx=llm_num_ctx, llm_temperature=llm_temperature, llm_base_url=llm_base_url, llm_api_key=llm_api_key, use_own_browser=use_own_browser, keep_browser_open=keep_browser_open, headless=headless, disable_security=disable_security, window_w=window_w, window_h=window_h, save_recording_path=save_recording_path, save_agent_history_path=save_agent_history_path, save_trace_path=save_trace_path, enable_recording=enable_recording, task=task, add_infos=add_infos, max_steps=max_steps, use_vision=use_vision, max_actions_per_step=max_actions_per_step, tool_calling_method=tool_calling_method, chrome_cdp=chrome_cdp, max_input_tokens=max_input_tokens ) # Add HTML content at the start of the result array yield [gr.update(visible=False)] + list(result) else: try: # Run the browser agent in the background agent_task = asyncio.create_task( run_browser_agent( agent_type=agent_type, llm_provider=llm_provider, llm_model_name=llm_model_name, llm_num_ctx=llm_num_ctx, llm_temperature=llm_temperature, llm_base_url=llm_base_url, llm_api_key=llm_api_key, use_own_browser=use_own_browser, keep_browser_open=keep_browser_open, headless=headless, disable_security=disable_security, window_w=window_w, window_h=window_h, save_recording_path=save_recording_path, save_agent_history_path=save_agent_history_path, save_trace_path=save_trace_path, enable_recording=enable_recording, task=task, add_infos=add_infos, max_steps=max_steps, use_vision=use_vision, max_actions_per_step=max_actions_per_step, tool_calling_method=tool_calling_method, chrome_cdp=chrome_cdp, max_input_tokens=max_input_tokens ) ) # Initialize values for streaming html_content = f"