Skip to content

Commit

Permalink
🔄 synced local 'skyvern/' with remote 'skyvern/'
Browse files Browse the repository at this point in the history
<!-- ELLIPSIS_HIDDEN -->

| 🚀 | This description was created by [Ellipsis](https://www.ellipsis.dev) for commit 58a439d843b5f612e79e04741b68d72463fa812e  |
|--------|--------|

### Summary:
Enhanced script usage instructions, error handling, and DOM utility functions, including uncommenting a line to remove orphan nodes in `skyvern/webeye/scraper/domUtils.js`.

**Key points**:
- `scripts/restart_task.py`: Updated script usage instructions for `tasks.csv` file format.
- `skyvern/exceptions.py`: Improved error messages and added `FailToClick` exception.
- `skyvern/forge/agent.py`: Fixed typo in comment.
- `skyvern/forge/prompts/skyvern/extract-action-claude3-sonnet.j2`: Added `action_plan` key to JSON response format.
- `skyvern/forge/prompts/skyvern/extract-action.j2`: Added `action_plan` key to JSON response format.
- `skyvern/webeye/actions/handler.py`: Added handling for `FailToClick` exception and improved action handling logic.
- `skyvern/webeye/scraper/domUtils.js`: Uncommented line to remove orphan nodes from result array, enhanced visibility checks for radio inputs, and improved hint marker creation logic.
- `skyvern/webeye/scraper/scraper.py`: Updated element tree building logic and fixed issues with element attribute trimming.

----
Generated with ❤️ by [ellipsis.dev](https://www.ellipsis.dev)

<!-- ELLIPSIS_HIDDEN -->
  • Loading branch information
ykeremy committed Jul 2, 2024
1 parent eb7478a commit c62e774
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 22 deletions.
7 changes: 6 additions & 1 deletion skyvern/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def __init__(self, tag_name: str):

class MissingElementDict(SkyvernException):
def __init__(self, element_id: str) -> None:
super().__init__(f"Found no element in the dict. element_id={element_id}")
super().__init__(f"Invalid element id. element_id={element_id}")


class MissingElementInIframe(SkyvernException):
Expand All @@ -325,6 +325,11 @@ def __init__(self, element_id: str):
)


class FailToClick(SkyvernException):
def __init__(self, element_id: str):
super().__init__(f"Failed to click. element_id={element_id}")


class FailToSelectByLabel(SkyvernException):
def __init__(self, element_id: str):
super().__init__(f"Failed to select by label. element_id={element_id}")
Expand Down
2 changes: 1 addition & 1 deletion skyvern/forge/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -930,7 +930,7 @@ async def _build_and_record_step_prompt(
LOG.info("Using Claude3 Sonnet prompt template for action extraction")
prompt_template = "extract-action-claude3-sonnet"

# TODO: we only use HTML element for now, introduce a way to swith in the future
# TODO: we only use HTML element for now, introduce a way to switch in the future
element_tree_format = ElementTreeFormat.HTML
LOG.info(
"Building element tree",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ If you see a popup in the page screenshot, prioritize actions on the popup.

Reply in JSON format with the following keys:
{
"action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in
"actions": array // An array of actions. Here's the format of each action:
[{
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
Expand Down
1 change: 1 addition & 0 deletions skyvern/forge/prompts/skyvern/extract-action.j2
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ If you see a popup in the page screenshot, prioritize actions on the popup.

Reply in JSON format with the following keys:
{
"action_plan": str, // A string that describes the plan of actions you're going to take. Be specific and to the point. Use this as a quick summary of the actions you're going to take, and what order you're going to take them in
"actions": array // An array of actions. Here's the format of each action:
[{
"reasoning": str, // The reasoning behind the action. Be specific, referencing any user information and their fields and element ids in your reasoning. Mention why you chose the action type, and why you chose the element id. Keep the reasoning short and to the point.
Expand Down
17 changes: 11 additions & 6 deletions skyvern/webeye/actions/handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from skyvern.constants import INPUT_TEXT_TIMEOUT, REPO_ROOT_DIR
from skyvern.exceptions import (
EmptySelect,
FailToClick,
FailToSelectByIndex,
FailToSelectByLabel,
FailToSelectByValue,
Expand Down Expand Up @@ -789,10 +790,11 @@ async def chain_click(
javascript_triggered=javascript_triggered,
)
]
except Exception as e:

except Exception:
action_results: list[ActionResult] = [
ActionFailure(
e,
FailToClick(action.element_id),
javascript_triggered=javascript_triggered,
)
]
Expand Down Expand Up @@ -826,7 +828,7 @@ async def chain_click(
interacted_with_parent=True,
)
)
except Exception as pe:
except Exception:
LOG.warning(
"Failed to click parent element",
action=action,
Expand All @@ -835,7 +837,7 @@ async def chain_click(
)
action_results.append(
ActionFailure(
pe,
FailToClick(action.element_id),
javascript_triggered=javascript_triggered,
interacted_with_parent=True,
)
Expand Down Expand Up @@ -1073,9 +1075,12 @@ async def click_sibling_of_input(
javascript_triggered=javascript_triggered,
interacted_with_sibling=True,
)
except Exception as e:
except Exception:
LOG.warning("Failed to click sibling label of input element", exc_info=True)
return ActionFailure(exception=e, javascript_triggered=javascript_triggered)
return ActionFailure(
exception=Exception("Failed while trying to click sibling of input element"),
javascript_triggered=javascript_triggered,
)


async def extract_information_for_navigation_goal(
Expand Down
51 changes: 37 additions & 14 deletions skyvern/webeye/scraper/domUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,10 @@ function isElementStyleVisibilityVisible(element, style) {
function isElementVisible(element) {
// TODO: This is a hack to not check visibility for option elements
// because they are not visible by default. We check their parent instead for visibility.
if (element.tagName.toLowerCase() === "option")
if (
element.tagName.toLowerCase() === "option" ||
(element.tagName.toLowerCase() === "input" && element.type === "radio")
)
return element.parentElement && isElementVisible(element.parentElement);

if (element.className.toString().includes("select2-offscreen")) {
Expand Down Expand Up @@ -1088,7 +1091,11 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
const labelElement = document.querySelector(
element.tagName + '[unique_id="' + element.id + '"]',
);
if (labelElement && labelElement.childElementCount === 0) {
if (
labelElement &&
labelElement.childElementCount === 0 &&
!labelElement.getAttribute("for")
) {
continue;
}
}
Expand Down Expand Up @@ -1234,15 +1241,30 @@ function createHintMarkersForGroups(groups) {
return [];
}

const hintMarkers = groups.map((group) => createHintMarkerForGroup(group));

const hintMarkers = groups
.filter((group) => group.elements.some((element) => element.interactable))
.map((group) => createHintMarkerForGroup(group));
// fill in marker text
const hintStrings = generateHintStrings(hintMarkers.length);
// const hintStrings = generateHintStrings(hintMarkers.length);
for (let i = 0; i < hintMarkers.length; i++) {
const hintMarker = hintMarkers[i];
hintMarker.hintString = hintStrings[i];

let interactableElementFound = false;

for (let i = 0; i < hintMarker.group.elements.length; i++) {
if (hintMarker.group.elements[i].interactable) {
hintMarker.hintString = hintMarker.group.elements[i].id;
interactableElementFound = true;
break;
}
}

if (!interactableElementFound) {
hintMarker.hintString = "";
}

try {
hintMarker.element.innerHTML = hintMarker.hintString.toUpperCase();
hintMarker.element.innerHTML = hintMarker.hintString;
} catch (e) {
// Ensure trustedTypes is available
if (typeof trustedTypes !== "undefined") {
Expand All @@ -1262,22 +1284,23 @@ function createHintMarkersForGroups(groups) {
}

function createHintMarkerForGroup(group) {
// Calculate the position of the element relative to the document
var scrollTop = window.pageYOffset || document.documentElement.scrollTop;
var scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;

const marker = {};
// yellow annotation box with string
const el = document.createElement("div");
el.style.left = group.rect.left + "px";
el.style.top = group.rect.top + "px";
el.style.position = "absolute";
el.style.left = group.rect.left + scrollLeft + "px";
el.style.top = group.rect.top + scrollTop + "px";
// Each group is assigned a different incremental z-index, we use the same z-index for the
// bounding box and the hint marker
el.style.zIndex = this.currentZIndex;

// The bounding box around the group of hints.
const boundingBox = document.createElement("div");

// Calculate the position of the element relative to the document
var scrollTop = window.pageYOffset || document.documentElement.scrollTop;
var scrollLeft = window.pageXOffset || document.documentElement.scrollLeft;

// Set styles for the bounding box
boundingBox.style.position = "absolute";
boundingBox.style.display = "display";
Expand All @@ -1302,7 +1325,7 @@ function addHintMarkersToPage(hintMarkers) {
const parent = document.createElement("div");
parent.id = "boundingBoxContainer";
for (const hintMarker of hintMarkers) {
// parent.appendChild(hintMarker.element);
parent.appendChild(hintMarker.element);
parent.appendChild(hintMarker.boundingBox);
}
document.documentElement.appendChild(parent);
Expand Down
2 changes: 2 additions & 0 deletions skyvern/webeye/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ def trim_element_tree(elements: list[dict]) -> list[dict]:

def _trimmed_attributes(tag_name: str, attributes: dict) -> dict:
new_attributes: dict = {}

for key in attributes:
if key == "id" and tag_name in ["input", "textarea", "select"]:
# We don't want to remove the id attribute any of these elements in case there's a label for it
Expand All @@ -484,6 +485,7 @@ def _trimmed_attributes(tag_name: str, attributes: dict) -> dict:
new_attributes[key] = attributes[key]
if key in RESERVED_ATTRIBUTES and attributes[key]:
new_attributes[key] = attributes[key]

return new_attributes


Expand Down

0 comments on commit c62e774

Please sign in to comment.