diff --git a/skyvern/forge/prompts/skyvern/parse-input-or-select-context.j2 b/skyvern/forge/prompts/skyvern/parse-input-or-select-context.j2 index ff25f32ed..c81cdbe09 100644 --- a/skyvern/forge/prompts/skyvern/parse-input-or-select-context.j2 +++ b/skyvern/forge/prompts/skyvern/parse-input-or-select-context.j2 @@ -7,6 +7,7 @@ Reply in the following JSON format: "thought": str, // A string to describe how you double-check the information to ensure the accuracy. "field": str, // Which field is this action intended to fill out? "is_required": bool, // True if this is a required field, otherwise false. + "is_search_bar": bool, // True if the element to take the action is a search bar, otherwise false. } Existing reasoning context: diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py index 3cf5f3104..05b4b2da4 100644 --- a/skyvern/webeye/actions/actions.py +++ b/skyvern/webeye/actions/actions.py @@ -46,9 +46,10 @@ def __repr__(self) -> str: class InputOrSelectContext(BaseModel): field: str | None = None is_required: bool | None = None + is_search_bar: bool | None = None # don't trigger custom-selection logic when it's a search bar def __repr__(self) -> str: - return f"InputOrSelectContext(field={self.field}, is_required={self.is_required})" + return f"InputOrSelectContext(field={self.field}, is_required={self.is_required}, is_search_bar={self.is_search_bar})" class Action(BaseModel): diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 795b3fc02..8cc35db4d 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -466,7 +466,7 @@ async def handle_input_text_action( if result is not None: return [result] LOG.info( - "No dropdown menu detected, indicating it couldn't be a selectable auto-completion input", + "It might not be a selectable auto-completion input, exit the custom selection mode", task_id=task.task_id, step_id=step.step_id, element_id=skyvern_element.get_id(), @@ -508,7 +508,7 @@ async def handle_input_text_action( return [ActionSuccess()] if await skyvern_element.is_auto_completion_input(): - result = await input_or_auto_complete_input( + if result := await input_or_auto_complete_input( action=action, page=page, dom=dom, @@ -516,8 +516,8 @@ async def handle_input_text_action( skyvern_element=skyvern_element, step=step, task=task, - ) - return [result] + ): + return [result] await skyvern_element.input_sequentially(text=text) return [ActionSuccess()] @@ -1198,7 +1198,7 @@ async def input_or_auto_complete_input( skyvern_element: SkyvernElement, step: Step, task: Task, -) -> ActionResult: +) -> ActionResult | None: LOG.info( "Trigger auto completion", task_id=task.task_id, @@ -1257,6 +1257,15 @@ async def input_or_auto_complete_input( if isinstance(result.action_result, ActionSuccess): return ActionSuccess() + if input_or_select_context.is_search_bar: + LOG.info( + "Stop generating potential values for the auto-completion since it's a search bar", + context=input_or_select_context, + step_id=step.step_id, + task_id=task.task_id, + ) + return None + tried_values.append(current_value) whole_new_elements.extend(result.incremental_elements) @@ -1373,6 +1382,15 @@ async def sequentially_select_from_dropdown( step_id=step.step_id, ) + if not force_select and input_or_select_context.is_search_bar: + LOG.info( + "Exit custom selection mode since it's a non-force search bar", + context=input_or_select_context, + task_id=task.task_id, + step_id=step.step_id, + ) + return None, None + # TODO: only suport the third-level dropdown selection now MAX_SELECT_DEPTH = 3 values: list[str | None] = []