|
|
|
|
|
|
|
|
|
import asyncio
|
|
import base64
|
|
import json
|
|
import os
|
|
import shutil
|
|
import tempfile
|
|
from typing import List
|
|
|
|
import yaml
|
|
from browsing import pre_login
|
|
|
|
from openhands.controller.state.state import State
|
|
from openhands.core.config import (
|
|
AppConfig,
|
|
LLMConfig,
|
|
SandboxConfig,
|
|
get_llm_config_arg,
|
|
get_parser,
|
|
)
|
|
from openhands.core.logger import openhands_logger as logger
|
|
from openhands.core.main import create_runtime, run_controller
|
|
from openhands.events.action import CmdRunAction, MessageAction
|
|
from openhands.events.observation import BrowserOutputObservation, CmdOutputObservation
|
|
from openhands.runtime.base import Runtime
|
|
from openhands.utils.async_utils import call_async_from_sync
|
|
|
|
|
|
def get_config(
|
|
base_container_image: str,
|
|
task_short_name: str,
|
|
mount_path_on_host: str,
|
|
llm_config: LLMConfig,
|
|
) -> AppConfig:
|
|
config = AppConfig(
|
|
run_as_openhands=False,
|
|
max_budget_per_task=4,
|
|
max_iterations=100,
|
|
save_trajectory_path=os.path.join(
|
|
mount_path_on_host, f'traj_{task_short_name}.json'
|
|
),
|
|
sandbox=SandboxConfig(
|
|
base_container_image=base_container_image,
|
|
enable_auto_lint=True,
|
|
|
|
use_host_network=True,
|
|
|
|
timeout=300,
|
|
api_key=os.environ.get('ALLHANDS_API_KEY', None),
|
|
),
|
|
|
|
|
|
workspace_mount_path=mount_path_on_host,
|
|
workspace_mount_path_in_sandbox='/outputs',
|
|
)
|
|
config.set_llm_config(llm_config)
|
|
return config
|
|
|
|
|
|
def load_dependencies(runtime: Runtime) -> List[str]:
|
|
"""
|
|
Every task has a dependencies.yml file, which lists all the services that the
|
|
task depends on. This function loads the file and returns all dependent service names.
|
|
"""
|
|
command = 'cat /utils/dependencies.yml'
|
|
action = CmdRunAction(command=command)
|
|
logger.info(action, extra={'msg_type': 'ACTION'})
|
|
obs: CmdOutputObservation = runtime.run_action(action)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert obs.exit_code == 0
|
|
dependencies = yaml.safe_load(obs.content)
|
|
if dependencies is None:
|
|
dependencies = []
|
|
return dependencies
|
|
|
|
|
|
def init_task_env(runtime: Runtime, hostname: str, env_llm_config: LLMConfig):
|
|
command = (
|
|
f'SERVER_HOSTNAME={hostname} '
|
|
f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} '
|
|
f'LITELLM_BASE_URL={env_llm_config.base_url} '
|
|
f'LITELLM_MODEL={env_llm_config.model} '
|
|
'bash /utils/init.sh'
|
|
)
|
|
action = CmdRunAction(command=command)
|
|
action.set_hard_timeout(900)
|
|
logger.info(action, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert obs.exit_code == 0
|
|
|
|
|
|
def codeact_user_response(state: State) -> str:
|
|
msg = (
|
|
'Please continue working on the task on whatever approach you think is suitable.\n'
|
|
'If you think you have solved the task, please finish the interaction.\n'
|
|
'IMPORTANT: YOU SHOULD NEVER ASK FOR HUMAN HELP.\n'
|
|
)
|
|
|
|
if state.history:
|
|
|
|
user_msgs = [
|
|
event
|
|
for event in state.history
|
|
if isinstance(event, MessageAction) and event.source == 'user'
|
|
]
|
|
if len(user_msgs) >= 2:
|
|
|
|
return (
|
|
msg
|
|
+ 'If you want to give up, run: <execute_bash> exit </execute_bash>.\n'
|
|
)
|
|
return msg
|
|
|
|
|
|
def run_solver(
|
|
runtime: Runtime,
|
|
task_name: str,
|
|
config: AppConfig,
|
|
dependencies: List[str],
|
|
save_final_state: bool,
|
|
state_dir: str,
|
|
save_screenshots: bool,
|
|
screenshots_dir: str,
|
|
) -> State:
|
|
instruction = 'Complete the task in /instruction/task.md'
|
|
|
|
if 'gitlab' in dependencies:
|
|
instruction += "\n\nGitlab username is 'root' and password is 'theagentcompany'"
|
|
|
|
state: State | None = asyncio.run(
|
|
run_controller(
|
|
config=config,
|
|
sid=task_name,
|
|
initial_user_action=MessageAction(content=instruction),
|
|
runtime=runtime,
|
|
fake_user_response_fn=codeact_user_response,
|
|
)
|
|
)
|
|
logger.info(state)
|
|
|
|
if save_screenshots:
|
|
screenshots_dir = os.path.join(screenshots_dir, task_name)
|
|
os.makedirs(screenshots_dir, exist_ok=True)
|
|
for image_id, obs in enumerate(state.history):
|
|
if isinstance(obs, BrowserOutputObservation):
|
|
image_data = base64.b64decode(obs.screenshot)
|
|
with open(
|
|
os.path.join(screenshots_dir, f'{image_id}.png'), 'wb'
|
|
) as file:
|
|
file.write(image_data)
|
|
|
|
if save_final_state:
|
|
os.makedirs(state_dir, exist_ok=True)
|
|
with open(os.path.join(state_dir, f'state_{task_name}.json'), 'w') as file:
|
|
json.dump(str(state), file)
|
|
|
|
return state
|
|
|
|
|
|
def run_evaluator(
|
|
runtime: Runtime, env_llm_config: LLMConfig, trajectory_path: str, result_path: str
|
|
):
|
|
command = (
|
|
f'LITELLM_API_KEY={env_llm_config.api_key.get_secret_value() if env_llm_config.api_key else None} '
|
|
f'LITELLM_BASE_URL={env_llm_config.base_url} '
|
|
f'LITELLM_MODEL={env_llm_config.model} '
|
|
f"DECRYPTION_KEY='theagentcompany is all you need' "
|
|
f'python_default /utils/eval.py --trajectory_path {trajectory_path} --result_path {result_path}'
|
|
)
|
|
action = CmdRunAction(command=command)
|
|
action.set_hard_timeout(600)
|
|
logger.info(action, extra={'msg_type': 'ACTION'})
|
|
obs = runtime.run_action(action)
|
|
logger.info(obs, extra={'msg_type': 'OBSERVATION'})
|
|
assert obs.exit_code == 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
parser = get_parser()
|
|
parser.add_argument(
|
|
'--task-image-name',
|
|
type=str,
|
|
default='ghcr.io/theagentcompany/example-image:1.0.0',
|
|
help='Task image name',
|
|
)
|
|
parser.add_argument(
|
|
'--outputs-path',
|
|
type=str,
|
|
default='./outputs',
|
|
help='Folder path to save trajectories and evaluation results',
|
|
)
|
|
parser.add_argument(
|
|
'--server-hostname',
|
|
type=str,
|
|
default='localhost',
|
|
help='Server hostname, e.g. localhost to access the host machine from the container, '
|
|
'assuming the task docker container is run with `--network host` flag',
|
|
)
|
|
parser.add_argument(
|
|
'--agent-llm-config',
|
|
type=str,
|
|
default=None,
|
|
help='LLM config for agent',
|
|
)
|
|
parser.add_argument(
|
|
'--env-llm-config',
|
|
type=str,
|
|
default=None,
|
|
help='LLM config for evaluation environment (NPC & llm-based evaluator)',
|
|
)
|
|
args, _ = parser.parse_known_args()
|
|
|
|
agent_llm_config: LLMConfig | None = None
|
|
if args.agent_llm_config:
|
|
agent_llm_config = get_llm_config_arg(args.agent_llm_config)
|
|
|
|
if agent_llm_config is None:
|
|
raise ValueError(
|
|
f'Could not find LLM config for agent: --agent-llm-config {args.agent_llm_config}'
|
|
)
|
|
|
|
if agent_llm_config.api_key is None:
|
|
raise ValueError('LLM API key is not set for agent')
|
|
|
|
env_llm_config: LLMConfig | None = None
|
|
if args.env_llm_config:
|
|
env_llm_config = get_llm_config_arg(args.env_llm_config)
|
|
|
|
if env_llm_config is None:
|
|
raise ValueError(
|
|
f'Could not find LLM config for evaluation environment: --env-llm-config {args.env_llm_config}'
|
|
)
|
|
|
|
if env_llm_config.api_key is None:
|
|
raise ValueError('LLM API key is not set for evaluation environment')
|
|
|
|
task_short_name = args.task_image_name.split('/')[-1].split(':')[0]
|
|
logger.info(
|
|
f'Task image name is {args.task_image_name}, short name is {task_short_name}'
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if os.getenv('TMPDIR') and os.path.exists(os.getenv('TMPDIR')):
|
|
temp_dir = os.path.abspath(os.getenv('TMPDIR'))
|
|
else:
|
|
temp_dir = tempfile.mkdtemp()
|
|
config: AppConfig = get_config(
|
|
args.task_image_name, task_short_name, temp_dir, agent_llm_config
|
|
)
|
|
runtime: Runtime = create_runtime(config)
|
|
call_async_from_sync(runtime.connect)
|
|
|
|
init_task_env(runtime, args.server_hostname, env_llm_config)
|
|
|
|
dependencies = load_dependencies(runtime)
|
|
logger.info(f'Service dependencies: {dependencies}')
|
|
|
|
try:
|
|
pre_login(
|
|
runtime,
|
|
dependencies,
|
|
save_screenshots=True,
|
|
screenshots_dir=os.path.join(
|
|
os.path.abspath(args.outputs_path), 'screenshots'
|
|
),
|
|
)
|
|
except Exception as e:
|
|
logger.error(f'Failed to pre-login: {e}')
|
|
|
|
|
|
init_task_env(runtime, args.server_hostname, env_llm_config)
|
|
pre_login(
|
|
runtime,
|
|
dependencies,
|
|
save_screenshots=True,
|
|
screenshots_dir=os.path.join(
|
|
os.path.abspath(args.outputs_path), 'screenshots'
|
|
),
|
|
)
|
|
|
|
state = run_solver(
|
|
runtime,
|
|
task_short_name,
|
|
config,
|
|
dependencies,
|
|
save_final_state=True,
|
|
state_dir=os.path.abspath(args.outputs_path),
|
|
save_screenshots=True,
|
|
screenshots_dir=os.path.join(os.path.abspath(args.outputs_path), 'screenshots'),
|
|
)
|
|
|
|
|
|
trajectory_path = f'/outputs/traj_{task_short_name}.json'
|
|
result_path = f'/outputs/eval_{task_short_name}.json'
|
|
|
|
run_evaluator(runtime, env_llm_config, trajectory_path, result_path)
|
|
|
|
|
|
shutil.move(
|
|
os.path.join(temp_dir, f'traj_{task_short_name}.json'),
|
|
os.path.join(
|
|
os.path.abspath(args.outputs_path), f'traj_{task_short_name}.json'
|
|
),
|
|
)
|
|
shutil.move(
|
|
os.path.join(temp_dir, f'eval_{task_short_name}.json'),
|
|
os.path.join(
|
|
os.path.abspath(args.outputs_path), f'eval_{task_short_name}.json'
|
|
),
|
|
)
|
|
|