Skip to content

Commit

Permalink
Massage WebArena instance (ServiceNow#259)
Browse files Browse the repository at this point in the history
  • Loading branch information
gasse authored and qipeng committed Nov 20, 2024
1 parent 24902be commit f96fbd0
Showing 1 changed file with 72 additions and 53 deletions.
125 changes: 72 additions & 53 deletions browsergym/experiments/src/browsergym/experiments/benchmark/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,22 @@ def prepare_backend(backend: str):
default_instance = WebArenaInstance()
default_instance.full_reset()

massage_tasks(
[
f"webarena.{id}"
for id in [
410, # reddit
533, # gitlab
561, # gitlab wiki
562, # gitlab reddit
574, # shopping
640, # reddit
680, # shopping_admin
740, # wiki map
]
]
)

case "visualwebarena":
# register environments
import browsergym.visualwebarena
Expand All @@ -126,59 +142,28 @@ def prepare_backend(backend: str):
default_instance = VisualWebArenaInstance()
default_instance.full_reset()

vwa_massage_task_ids = [
0, # classifieds
33, # classifieds
555, # shopping
666, # shopping
282, # __REDDIT__/f/dataisbeautiful
305, # __REDDIT__/f/memes/new
314, # __REDDIT__/f/mildlyinteresting
317, # __REDDIT__/f/Art/active
318, # __REDDIT__/f/consoles
319, # __REDDIT__/f/EarthPorn
410, # __REDDIT__/f/food
411, # __REDDIT__/f/food
427, # __REDDIT__/f/EarthPorn
436, # __REDDIT__/f/Art
440, # __REDDIT__/f/EarthPorn
]
vwa_massage_max_retries = 1
for i, task_id in enumerate(vwa_massage_task_ids):
gym_id = f"browsergym/visualwebarena.{task_id}"
logger.info(
f"VisualWebArena instance massaging {i + 1} / {len(vwa_massage_task_ids)} ({gym_id} reset)"
)
retries = 0
while True:
env = gym.make(gym_id)
try:
env.reset() # task setup
no_action = "noop()"
try:
# check if action space exists and is compatible with "noop()"
env.unwrapped.action_mapping(no_action)
except:
# fallback plan
no_action = ""
env.step(no_action) # task validation
env.step(no_action) # task validation again
logger.info(f"Massage successful")
break
except Exception as e:
if retries < vwa_massage_max_retries:
retries += 1
logger.info(
f"Massage failed, retrying ({retries} / {vwa_massage_max_retries})"
)
continue
else:
logger.warning(
f"Error during VisualWebArena instance massaging ({gym_id}, {retries} retries): {e}"
)
break
finally:
env.close()
massage_tasks(
[
f"visualwebarena.{id}"
for id in [
0, # classifieds
33, # classifieds
555, # shopping
666, # shopping
282, # __REDDIT__/f/dataisbeautiful
305, # __REDDIT__/f/memes/new
314, # __REDDIT__/f/mildlyinteresting
317, # __REDDIT__/f/Art/active
318, # __REDDIT__/f/consoles
319, # __REDDIT__/f/EarthPorn
410, # __REDDIT__/f/food
411, # __REDDIT__/f/food
427, # __REDDIT__/f/EarthPorn
436, # __REDDIT__/f/Art
440, # __REDDIT__/f/EarthPorn
]
]
)

case "workarena":
# register environments
Expand Down Expand Up @@ -213,3 +198,37 @@ def prepare_backend(backend: str):

case _:
raise NotImplementedError(f"Unknown benchmark backend {repr(backend)}")


def massage_tasks(task_ids: list[str], max_retries: int = 1):
for i, task_id in enumerate(task_ids):
gym_id = f"browsergym/{task_id}"
logger.info(f"Massaging task {i + 1} / {len(task_ids)}: {gym_id}")
task_retries = 0
while True:
env = gym.make(gym_id)
try:
env.reset() # task setup
try:
no_action = "noop()"
# check if action space exists and is compatible with "noop()"
env.unwrapped.action_mapping(no_action)
except:
# fallback plan
no_action = ""
env.step(no_action) # task validation
env.step(no_action) # task validation again
logger.info(f"Massage successful")
break
except Exception as e:
if task_retries < max_retries:
task_retries += 1
logger.info(f"Massage failed, retrying ({task_retries} / {max_retries})")
continue
else:
logger.warning(
f"Error during task massage after {task_retries} retries ({gym_id}): {e}"
)
break
finally:
env.close()

0 comments on commit f96fbd0

Please sign in to comment.