Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add complete action verification #845

Merged
merged 1 commit into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions skyvern/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,9 @@ def __init__(self, current_value: str) -> None:
super().__init__(
f"Can't find a suitable auto completion for the current value, maybe retry with another reasonable value. current_value={current_value}"
)


class IllegitComplete(SkyvernException):
def __init__(self, data: dict | None = None) -> None:
data_str = f", data={data}" if data else ""
super().__init__(f"Illegit complete{data_str}")
81 changes: 80 additions & 1 deletion skyvern/forge/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
WebAction,
parse_actions,
)
from skyvern.webeye.actions.handler import ActionHandler, poll_verification_code
from skyvern.webeye.actions.handler import ActionHandler, handle_complete_action, poll_verification_code
from skyvern.webeye.actions.models import AgentStepOutput, DetailedAgentStepOutput
from skyvern.webeye.actions.responses import ActionResult
from skyvern.webeye.browser_factory import BrowserState
Expand Down Expand Up @@ -773,6 +773,36 @@ async def agent_step(
step_retry=step.retry_index,
action_results=action_results,
)
if app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached(
"CHECK_USER_GOAL_SUCCESS_EVERY_STEP",
task.workflow_run_id or task.task_id,
properties={
"organization_id": task.organization_id,
"organization_created_at": str(organization.created_at) if organization else None,
},
):
LOG.info("Checking if user goal is achieved after re-scraping the page")
# Check if navigation goal is achieved after re-scraping the page
new_scraped_page = await self._scrape_with_type(
task=task,
step=step,
browser_state=browser_state,
scrape_type=ScrapeType.NORMAL,
organization=organization,
)
if new_scraped_page is None:
LOG.warning("Failed to scrape the page before checking user goal success, skipping check...")
else:
working_page = await browser_state.get_working_page()
result_tuple = await self.check_user_goal_success(
page=working_page,
scraped_page=new_scraped_page,
task=task,
step=step,
)
if result_tuple is not None:
complete_action, action_results = result_tuple
detailed_agent_step_output.actions_and_results.append((complete_action, action_results))
# If no action errors return the agent state and output
completed_step = await self.update_step(
step=step,
Expand Down Expand Up @@ -811,6 +841,55 @@ async def agent_step(
)
return failed_step, detailed_agent_step_output.get_clean_detailed_output()

@staticmethod
async def check_user_goal_success(
page: Page, scraped_page: ScrapedPage, task: Task, step: Step
) -> tuple[CompleteAction, list[ActionResult]] | None:
try:
# Check if Skyvern already returned a complete action, if so, don't run verification
if step.output and step.output.actions_and_results:
for action, results in step.output.actions_and_results:
if isinstance(action, CompleteAction):
return None

verification_prompt = prompt_engine.load_prompt(
"check-user-goal",
navigation_goal=task.navigation_goal,
navigation_payload=task.navigation_payload,
elements=scraped_page.build_element_tree(ElementTreeFormat.HTML),
)
screenshots = await SkyvernFrame.take_split_screenshots(page=page, url=page.url)

verification_llm_api_handler = app.SECONDARY_LLM_API_HANDLER

verification_response = await verification_llm_api_handler(
prompt=verification_prompt, step=step, screenshots=screenshots
)
if "user_goal_achieved" not in verification_response or "reasoning" not in verification_response:
LOG.error(
"Invalid LLM response for user goal success verification, skipping verification",
verification_response=verification_response,
)
return None

user_goal_achieved: bool = verification_response["user_goal_achieved"]
complete_action = CompleteAction(
reasoning=verification_response["reasoning"],
data_extraction_goal=task.data_extraction_goal,
)
# We don't want to return a complete action if the user goal is not achieved since we're checking at every step
if not user_goal_achieved:
return None

LOG.info("User goal achieved, executing complete action")
action_results = await handle_complete_action(complete_action, page, scraped_page, task, step)

return complete_action, action_results

except Exception:
LOG.error("LLM verification failed for complete action, skipping LLM verification", exc_info=True)
return None

async def record_artifacts_after_action(self, task: Task, step: Step, browser_state: BrowserState) -> None:
working_page = await browser_state.get_working_page()
if not working_page:
Expand Down
29 changes: 29 additions & 0 deletions skyvern/forge/prompts/skyvern/check-user-goal.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
Based on the content of the screenshot and the elements on the page, determine whether the user goal has been successfully completed or not.

The JSON object should be in this format:
```json
{
"reasoning": str, // Describe the state of the user goal and explain why it has been completed or not completed.
"user_goal_achieved": bool // True if the user goal has been completed, False otherwise.
}

Make sure to ONLY return the JSON object, with no additional text before or after it. Do not make any assumptions based on the screenshot, return a response solely based on what you observe in the screenshot and nothing else.

Examples:
{
"reasoning": "The screenshot shows a success message for a file upload field. Since the user's goal is to upload a file, it has been successfully completed.",
"user_goal_achieved": true
}
{
"reasoning": "The screenshot shows a job application form with fields. Since the user's goal is to submit a job application, it has not been successfully completed.",
"user_goal_achieved": false
}

Elements on the page:
{{ elements }}

User Goal:
{{ navigation_goal }}

User Details:
{{ navigation_payload }}
Loading