Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add linked element context #196

Merged
merged 1 commit into from
Apr 16, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 92 additions & 35 deletions skyvern/webeye/scraper/domUtils.js
Original file line number Diff line number Diff line change
Expand Up @@ -425,8 +425,11 @@ function getElementContext(element) {
return fullContext;
}

function getElementContent(element) {
function getElementContent(element, skipped_element = null) {
// DFS to get all the text content from all the nodes under the element
if (skipped_element && element === skipped_element) {
return "";
}

let textContent = element.textContent;
let nodeContent = "";
Expand All @@ -441,7 +444,7 @@ function getElementContent(element) {
nodeTextContentList.push(childText);
} else if (child.nodeType === Node.ELEMENT_NODE) {
// childText = child.textContent.trim();
childText = getElementContent(child);
childText = getElementContent(child, skipped_element);
} else {
console.log("Unhandled node type: ", child.nodeType);
}
Expand Down Expand Up @@ -632,27 +635,7 @@ function buildTreeFromBody() {
}
}

// TODO: Handle iframes
// setup before parsing the dom
checkSelect2();
// Clear all the unique_id attributes so that there are no conflicts
removeAllUniqueIdAttributes();
processElement(document.body, null);

for (var element of elements) {
if (
((element.tagName === "input" && element.attributes["type"] === "text") ||
element.tagName === "textarea") &&
(element.attributes["required"] || element.attributes["aria-required"]) &&
element.attributes.value === ""
) {
// TODO (kerem): we may want to pass these elements to the LLM as empty but required fields in the future
console.log(
"input element with required attribute and no value",
element,
);
}

const getContextByParent = (element) => {
// for most elements, we're going 10 layers up to see if we can find "label" as a parent
// if found, most likely the context under label is relevant to this element
let targetParentElements = new Set(["label", "fieldset"]);
Expand All @@ -671,22 +654,96 @@ function buildTreeFromBody() {
break;
}
}
if (targetContextualParent) {
let context = "";
var lowerCaseTagName = targetContextualParent.tagName.toLowerCase();
if (lowerCaseTagName === "label") {
if (!targetContextualParent) {
return "";
}

let context = "";
var lowerCaseTagName = targetContextualParent.tagName.toLowerCase();
if (lowerCaseTagName === "label") {
context = getElementContext(targetContextualParent);
} else if (lowerCaseTagName === "fieldset") {
// fieldset is usually within a form or another element that contains the whole context
targetContextualParent = targetContextualParent.parentElement;
if (targetContextualParent) {
context = getElementContext(targetContextualParent);
} else if (lowerCaseTagName === "fieldset") {
// fieldset is usually within a form or another element that contains the whole context
targetContextualParent = targetContextualParent.parentElement;
if (targetContextualParent) {
context = getElementContext(targetContextualParent);
}
}
if (context.length > 0) {
element.context = context;
}
return context;
};

const getContextByLinked = (element) => {
let currentEle = document.querySelector(`[unique_id="${element.id}"]`);
// check labels pointed to this element
// 1. element id -> labels pointed to this id
// 2. by attr "aria-labelledby" -> only one label with this id
let linkedElements = new Array();
const elementId = currentEle.getAttribute("id");
if (elementId) {
linkedElements = [
...document.querySelectorAll(`label[for="${elementId}"]`),
];
}
const labelled = currentEle.getAttribute("aria-labelledby");
if (labelled) {
const label = document.getElementById(labelled);
if (label) {
linkedElements.push(label);
}
}
const described = currentEle.getAttribute("aria-describedby");
if (described) {
const describe = document.getElementById(described);
if (describe) {
linkedElements.push(describe);
}
}

const fullContext = new Array();
for (let i = 0; i < linkedElements.length; i++) {
const linked = linkedElements[i];
// if the element is a child of the label, we should stop to get context before the element
const content = getElementContent(linked, currentEle);
if (content) {
fullContext.push(content);
}
}

const context = fullContext.join(";");
const charLimit = 1000;
if (context.length > charLimit) {
return "";
}

return context;
};

// TODO: Handle iframes
// setup before parsing the dom
checkSelect2();
// Clear all the unique_id attributes so that there are no conflicts
removeAllUniqueIdAttributes();
processElement(document.body, null);

for (var element of elements) {
if (
((element.tagName === "input" && element.attributes["type"] === "text") ||
element.tagName === "textarea") &&
(element.attributes["required"] || element.attributes["aria-required"]) &&
element.attributes.value === ""
) {
// TODO (kerem): we may want to pass these elements to the LLM as empty but required fields in the future
console.log(
"input element with required attribute and no value",
element,
);
}

const context = getContextByLinked(element) + getContextByParent(element);
// const context = getContextByParent(element)
if (context && context.length <= 1000) {
element.context = context;
}
}

return [elements, resultArray];
Expand Down
Loading