From 1533030ece12b276a39d0dea92ef89b0759d6704 Mon Sep 17 00:00:00 2001 From: ykeremy Date: Mon, 6 May 2024 20:02:43 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=94=84=20synced=20local=20'skyvern/'=20wi?= =?UTF-8?q?th=20remote=20'skyvern/'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- skyvern/forge/agent.py | 8 +- .../skyvern/extract-action-claude3-sonnet.j2 | 77 +++++++++++++++++++ .../forge/sdk/experimentation/providers.py | 10 +++ 3 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 skyvern/forge/prompts/skyvern/extract-action-claude3-sonnet.j2 diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index d0bb90a01..bf4150179 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -636,8 +636,14 @@ async def _build_and_record_step_prompt( current_url = ( await browser_state.page.evaluate("() => document.location.href") if browser_state.page else starting_url ) + prompt_template = "extract-action" + if app.EXPERIMENTATION_PROVIDER.is_feature_enabled_cached( + "USE_CLAUDE3_SONNET", task.workflow_run_id or task.task_id + ): + LOG.info("Using Claude3 Sonnet prompt template for action extraction") + prompt_template = "extract-action-claude3-sonnet" extract_action_prompt = prompt_engine.load_prompt( - "extract-action", + prompt_template, navigation_goal=navigation_goal, navigation_payload_str=json.dumps(task.navigation_payload), starting_url=starting_url, diff --git a/skyvern/forge/prompts/skyvern/extract-action-claude3-sonnet.j2 b/skyvern/forge/prompts/skyvern/extract-action-claude3-sonnet.j2 new file mode 100644 index 000000000..fc1def2f9 --- /dev/null +++ b/skyvern/forge/prompts/skyvern/extract-action-claude3-sonnet.j2 @@ -0,0 +1,77 @@ +Identify actions to help user progress towards the user goal using the DOM elements given in the list and the screenshot of the website. +Include only the elements that are relevant to the user goal, without altering or imagining new elements. +Use the details from the user details to fill in necessary values. Always satisfy required fields if the field isn't already filled in. Don't return any action for the same field, if this field is already filled in and the value is the same as the one you would have filled in. +MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments (//), no unnecessary quotes, etc. +Each element is tagged with an ID. +If you see any information in red in the page screenshot, this means a condition wasn't satisfied. prioritize actions with the red information. +If you see a popup in the page screenshot, prioritize actions on the popup. + +Reply in JSON format with the following keys: +{ + "actions": array // An array of actions. Here's the format of each action: + [{ + "reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point. + "confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence + "action_type": str, // It's a string enum: "CLICK", "INPUT_TEXT", "UPLOAD_FILE", "SELECT_OPTION", "WAIT", "SOLVE_CAPTCHA", "COMPLETE", "TERMINATE". "CLICK" is an element you'd like to click. "INPUT_TEXT" is an element you'd like to input text into. "UPLOAD_FILE" is an element you'd like to upload a file into. "SELECT_OPTION" is an element you'd like to select an option from. "WAIT" action should be used if there are no actions to take and there is some indication on screen that waiting could yield more actions. "WAIT" should not be used if there are actions to take. "SOLVE_CAPTCHA" should be used if there's a captcha to solve on the screen. "COMPLETE" is used when the user goal has been achieved AND if there's any data extraction goal, you should be able to get data from the page. Never return a COMPLETE action unless you confirm user goal is achieved through the elements or the screenshots. "TERMINATE" is used to terminate the whole task with a failure when it doesn't seem like the user goal can be achieved. Do not use "TERMINATE" if waiting could lead the user towards the goal. Only return "TERMINATE" if you are on a page where the user goal cannot be achieved. If you are returning "COMPLETE" or "TERMINATE", never return any other action in the same response. The "COMPLETE" and "TERMINATE" actions can only be returned once in the whole task. When they are returned, they have to be the only action in the response. + "id": int, // The id of the element to take action on. The id has to be one from the elements list + "text": str, // Text for INPUT_TEXT action only + "file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise. + "option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action + "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE + "index": int, // the id corresponding to the optionIndex under the the select element. + "value": str // the value of the option. MAKE SURE YOU USE THIS VALUE TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION VALUE HERE + }, +{% if error_code_mapping_str %} + "errors": array // A list of errors. This is used to surface any errors that matches the current situation for COMPLETE and TERMINATE actions. For other actions or if no error description suits the current situation on the screenshots, return an empty list. You are allowed to return multiple errors if there are multiple errors on the page. + [{ + "error_code": str, // The error code from the user's error code list + "reasoning": str, // The reasoning behind the error. Be specific, referencing any user information and their fields in your reasoning. Keep the reasoning short and to the point. + "confidence_float": float // The confidence of the error. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence + }] +{% endif %} + }], +} + +{% if action_history %} +Consider the action history from the last step and the screenshot together, if actions from the last step don't yield positive impact, try other actions or other action combinations. +{% endif %} + +Clickable elements from `{{ current_url }}`: +``` +{{ elements }} +``` + +The URL of the page you're on right now is `{{ current_url }}`. + +User goal: +``` +{{ navigation_goal }} +``` + +{% if error_code_mapping_str %} +Use the error codes and their descriptions to surface user-defined errors. Do not return any error that's not defined by the user. User defined errors: +{{ error_code_mapping_str }} +{% endif %} + +{% if data_extraction_goal %} + +User Data Extraction Goal: +``` +{{ data_extraction_goal }} +``` +{% endif %} + +User details: +``` +{{ navigation_payload_str }} +``` +{% if action_history %} + +Action results from previous steps: (note: even if the action history suggests goal is achieved, check the screenshot and the DOM elements to make sure the goal is achieved) +{{ action_history }} +{% endif %} + +Current datetime in UTC: +``` +{{ utc_datetime }} +``` \ No newline at end of file diff --git a/skyvern/forge/sdk/experimentation/providers.py b/skyvern/forge/sdk/experimentation/providers.py index 312576270..34e374331 100644 --- a/skyvern/forge/sdk/experimentation/providers.py +++ b/skyvern/forge/sdk/experimentation/providers.py @@ -6,10 +6,20 @@ class BaseExperimentationProvider(ABC): + # feature_name -> distinct_id -> result + result_map: dict[str, dict[str, bool]] = {} + @abstractmethod def is_feature_enabled(self, feature_name: str, distinct_id: str) -> bool: """Check if a specific feature is enabled.""" + def is_feature_enabled_cached(self, feature_name: str, distinct_id: str) -> bool: + if feature_name not in self.result_map: + self.result_map[feature_name] = {} + if distinct_id not in self.result_map[feature_name]: + self.result_map[feature_name][distinct_id] = self.is_feature_enabled(feature_name, distinct_id) + return self.result_map[feature_name][distinct_id] + class NoOpExperimentationProvider(BaseExperimentationProvider): def is_feature_enabled(self, feature_name: str, distinct_id: str) -> bool: