diff --git a/README.md b/README.md
index 8cb8e17..d01a932 100644
--- a/README.md
+++ b/README.md
@@ -12,9 +12,9 @@
## Introduction
diff --git a/buildspec.json b/buildspec.json
index 7eaa80e..6ddb303 100644
--- a/buildspec.json
+++ b/buildspec.json
@@ -37,7 +37,7 @@
}
},
"name": "obs-urlsource",
- "version": "0.3.6",
+ "version": "0.3.7",
"author": "Roy Shilkrot",
"website": "https://github.com/locaal-ai/obs-urlsource",
"email": "roy.shil@gmail.com",
diff --git a/src/parsers/html.cpp b/src/parsers/html.cpp
index 2b710a7..caade5a 100644
--- a/src/parsers/html.cpp
+++ b/src/parsers/html.cpp
@@ -20,99 +20,125 @@ lxb_status_t find_callback(lxb_dom_node_t *node, lxb_css_selector_specificity_t
{
UNUSED_PARAMETER(spec);
std::string str;
- (void)lxb_html_serialize_deep_cb(node, serializer_callback, &str);
- ((std::vector *)data)->push_back(str);
- return LXB_STATUS_OK;
+ lxb_status_t status = lxb_html_serialize_deep_cb(node, serializer_callback, &str);
+ if (status == LXB_STATUS_OK) {
+ ((std::vector *)data)->push_back(str);
+ }
+ return status;
}
lxb_status_t find_with_selectors(const std::string &slctrs, lxb_html_document_t *document,
std::vector &found)
{
- /* Create CSS parser. */
- lxb_css_parser_t *parser;
- lxb_css_selector_list_t *list;
- lxb_status_t status;
- lxb_dom_node_t *body;
- lxb_selectors_t *selectors;
-
- parser = lxb_css_parser_create();
- status = lxb_css_parser_init(parser, NULL);
- if (status != LXB_STATUS_OK) {
- obs_log(LOG_ERROR, "Failed to setup CSS parser");
- return EXIT_FAILURE;
- }
+ lxb_css_parser_t *parser = nullptr;
+ lxb_css_selector_list_t *list = nullptr;
+ lxb_selectors_t *selectors = nullptr;
+ lxb_status_t status = LXB_STATUS_ERROR;
+
+ do {
+ parser = lxb_css_parser_create();
+ if (!parser) {
+ obs_log(LOG_ERROR, "Failed to create CSS parser");
+ break;
+ }
- /* Selectors. */
- selectors = lxb_selectors_create();
- status = lxb_selectors_init(selectors);
- if (status != LXB_STATUS_OK) {
- obs_log(LOG_ERROR, "Failed to setup Selectors");
- return EXIT_FAILURE;
- }
+ status = lxb_css_parser_init(parser, nullptr);
+ if (status != LXB_STATUS_OK) {
+ obs_log(LOG_ERROR, "Failed to init CSS parser");
+ break;
+ }
- /* Parse and get the log. */
+ selectors = lxb_selectors_create();
+ if (!selectors) {
+ obs_log(LOG_ERROR, "Failed to create selectors");
+ break;
+ }
- list = lxb_css_selectors_parse(parser, (const lxb_char_t *)slctrs.c_str(), slctrs.length());
- if (parser->status != LXB_STATUS_OK) {
- obs_log(LOG_ERROR, "Failed to parse CSS selectors");
- return EXIT_FAILURE;
- }
+ status = lxb_selectors_init(selectors);
+ if (status != LXB_STATUS_OK) {
+ obs_log(LOG_ERROR, "Failed to init selectors");
+ break;
+ }
- /* Find HTML nodes by CSS Selectors. */
- body = lxb_dom_interface_node(lxb_html_document_body_element(document));
+ list = lxb_css_selectors_parse(parser, (const lxb_char_t *)slctrs.c_str(),
+ slctrs.length());
+ if (!list || parser->status != LXB_STATUS_OK) {
+ obs_log(LOG_ERROR, "Failed to parse CSS selectors");
+ break;
+ }
- status = lxb_selectors_find(selectors, body, list, find_callback, &found);
- if (status != LXB_STATUS_OK) {
- obs_log(LOG_ERROR, "Failed to find HTML nodes by CSS Selectors");
- return EXIT_FAILURE;
- }
+ lxb_dom_node_t *body =
+ lxb_dom_interface_node(lxb_html_document_body_element(document));
+ if (!body) {
+ obs_log(LOG_ERROR, "Failed to get document body");
+ break;
+ }
- /* Destroy Selectors object. */
- (void)lxb_selectors_destroy(selectors, true);
+ status = lxb_selectors_find(selectors, body, list, find_callback, &found);
+ if (status != LXB_STATUS_OK) {
+ obs_log(LOG_ERROR, "Failed to find nodes by CSS Selectors");
+ break;
+ }
- /* Destroy resources for CSS Parser. */
- (void)lxb_css_parser_destroy(parser, true);
+ } while (0);
- /* Destroy all object for all CSS Selector List. */
- lxb_css_selector_list_destroy_memory(list);
+ // Cleanup
+ if (list) {
+ lxb_css_selector_list_destroy_memory(list);
+ }
+ if (selectors) {
+ lxb_selectors_destroy(selectors, true);
+ }
+ if (parser) {
+ lxb_css_parser_destroy(parser, true);
+ }
- return LXB_STATUS_OK;
+ return status;
}
struct request_data_handler_response parse_html(struct request_data_handler_response response,
const url_source_request_data *request_data)
{
- lxb_status_t status;
- lxb_html_document_t *document;
+ lxb_html_document_t *document = nullptr;
- document = lxb_html_document_create();
- if (document == NULL) {
- return make_fail_parse_response("Failed to setup HTML parser");
- }
+ try {
+ document = lxb_html_document_create();
+ if (!document) {
+ return make_fail_parse_response("Failed to create HTML document");
+ }
- status = lxb_html_document_parse(document, (const lxb_char_t *)response.body.c_str(),
- response.body.length());
- if (status != LXB_STATUS_OK) {
- return make_fail_parse_response("Failed to parse HTML");
- }
+ lxb_status_t status =
+ lxb_html_document_parse(document, (const lxb_char_t *)response.body.c_str(),
+ response.body.length());
- std::string parsed_output = response.body;
- // Get the output value
- if (request_data->output_cssselector != "") {
- std::vector found;
- if (find_with_selectors(request_data->output_cssselector, document, found) !=
- LXB_STATUS_OK) {
- return make_fail_parse_response("Failed to find element with CSS selector");
- } else {
- if (found.size() > 0) {
- std::copy(found.begin(), found.end(),
- std::back_inserter(response.body_parts_parsed));
+ if (status != LXB_STATUS_OK) {
+ lxb_html_document_destroy(document);
+ return make_fail_parse_response("Failed to parse HTML");
+ }
+
+ if (!request_data->output_cssselector.empty()) {
+ std::vector found;
+ status = find_with_selectors(request_data->output_cssselector, document,
+ found);
+
+ if (status != LXB_STATUS_OK) {
+ lxb_html_document_destroy(document);
+ return make_fail_parse_response(
+ "Failed to find element with CSS selector");
}
+
+ response.body_parts_parsed = std::move(found);
+ } else {
+ response.body_parts_parsed.push_back(response.body);
}
- } else {
- // Return the whole HTML object
- response.body_parts_parsed.push_back(parsed_output);
- }
- return response;
+ lxb_html_document_destroy(document);
+ return response;
+
+ } catch (const std::exception &e) {
+ if (document) {
+ lxb_html_document_destroy(document);
+ }
+ return make_fail_parse_response(std::string("HTML parsing exception: ") + e.what());
+ }
}
diff --git a/src/parsers/jsonpath.cpp b/src/parsers/jsonpath.cpp
index 993a1d7..bec1ab8 100644
--- a/src/parsers/jsonpath.cpp
+++ b/src/parsers/jsonpath.cpp
@@ -2,67 +2,56 @@
#include "errors.h"
#include
-#include
#include
-#include
#include
+#include
struct request_data_handler_response parse_json(struct request_data_handler_response response,
const url_source_request_data *request_data)
{
UNUSED_PARAMETER(request_data);
-
- // Parse the response as JSON
- jsoncons::json json;
try {
- json = jsoncons::json::parse(response.body);
+ // Parse JSON only once and store in both formats
+ auto json_cons = jsoncons::json::parse(response.body);
response.body_json = nlohmann::json::parse(response.body);
- } catch (jsoncons::json_exception &e) {
+ return response;
+ } catch (const jsoncons::json_exception &e) {
return make_fail_parse_response(e.what());
- } catch (nlohmann::json::parse_error &e) {
+ } catch (const nlohmann::json::exception &e) {
return make_fail_parse_response(e.what());
}
- // Return the whole JSON object
- response.body_parts_parsed.push_back(json.as_string());
- return response;
}
struct request_data_handler_response parse_json_path(struct request_data_handler_response response,
const url_source_request_data *request_data)
{
-
- // Parse the response as JSON
- jsoncons::json json;
try {
- json = jsoncons::json::parse(response.body);
+ auto json = jsoncons::json::parse(response.body);
response.body_json = nlohmann::json::parse(response.body);
- } catch (jsoncons::json_exception &e) {
- return make_fail_parse_response(e.what());
- } catch (nlohmann::json::parse_error &e) {
- return make_fail_parse_response(e.what());
- }
- std::vector parsed_output = {};
- // Get the output value
- if (request_data->output_json_path != "") {
- try {
- const auto value = jsoncons::jsonpath::json_query(
- json, request_data->output_json_path);
+
+ if (!request_data->output_json_path.empty()) {
+ // Create and evaluate JSONPath expression
+ auto value = jsoncons::jsonpath::json_query(json,
+ request_data->output_json_path);
+
if (value.is_array()) {
- // extract array items as strings
+ response.body_parts_parsed.reserve(value.size());
for (const auto &item : value.array_range()) {
- parsed_output.push_back(item.as_string());
+ response.body_parts_parsed.push_back(
+ item.as());
}
} else {
- parsed_output.push_back(value.as_string());
+ response.body_parts_parsed.push_back(value.as());
}
- } catch (jsoncons::json_exception &e) {
- return make_fail_parse_response(e.what());
+ } else {
+ response.body_parts_parsed.push_back(json.as());
}
- } else {
- // Return the whole JSON object
- parsed_output.clear();
- parsed_output.push_back(json.as_string());
+
+ return response;
+
+ } catch (const jsoncons::jsonpath::jsonpath_error &e) {
+ return make_fail_parse_response(std::string("JSONPath error: ") + e.what());
+ } catch (const std::exception &e) {
+ return make_fail_parse_response(std::string("JSON parse error: ") + e.what());
}
- response.body_parts_parsed = parsed_output;
- return response;
}
diff --git a/src/parsers/regex.cpp b/src/parsers/regex.cpp
index 4fc3bf3..dae2842 100644
--- a/src/parsers/regex.cpp
+++ b/src/parsers/regex.cpp
@@ -1,6 +1,6 @@
-
#include "request-data.h"
#include "plugin-support.h"
+#include "errors.h"
#include
#include
@@ -8,30 +8,35 @@
struct request_data_handler_response parse_regex(struct request_data_handler_response response,
const url_source_request_data *request_data)
{
- std::string parsed_output = "";
- if (request_data->output_regex == "") {
- // Return the whole response body
- parsed_output = response.body;
- } else {
- // Parse the response as a regex
- std::regex regex(request_data->output_regex,
- std::regex_constants::ECMAScript | std::regex_constants::optimize);
+ try {
+ if (request_data->output_regex.empty()) {
+ response.body_parts_parsed.push_back(response.body);
+ return response;
+ }
+
+ // Cache compiled regex patterns for better performance
+ static thread_local std::unordered_map regex_cache;
+
+ auto ®ex = regex_cache[request_data->output_regex];
+ if (regex_cache.find(request_data->output_regex) == regex_cache.end()) {
+ regex = std::regex(request_data->output_regex,
+ std::regex_constants::ECMAScript |
+ std::regex_constants::optimize);
+ }
+
std::smatch match;
if (std::regex_search(response.body, match, regex)) {
- if (match.size() > 1) {
- parsed_output = match[1].str();
- } else {
- parsed_output = match[0].str();
- }
- } else {
- obs_log(LOG_INFO, "Failed to match regex");
- // Return an error response
- struct request_data_handler_response responseFail;
- responseFail.error_message = "Failed to match regex";
- responseFail.status_code = URL_SOURCE_REQUEST_PARSING_ERROR_CODE;
- return responseFail;
+ // Get the appropriate capture group
+ size_t group = match.size() > 1 ? 1 : 0;
+ response.body_parts_parsed.push_back(match[group].str());
+ return response;
}
+
+ return make_fail_parse_response("No regex match found");
+
+ } catch (const std::regex_error &e) {
+ return make_fail_parse_response(std::string("Regex error: ") + e.what());
+ } catch (const std::exception &e) {
+ return make_fail_parse_response(std::string("Parse error: ") + e.what());
}
- response.body_parts_parsed.push_back(parsed_output);
- return response;
}
diff --git a/src/parsers/xml.cpp b/src/parsers/xml.cpp
index 8bca2e1..de6929f 100644
--- a/src/parsers/xml.cpp
+++ b/src/parsers/xml.cpp
@@ -1,4 +1,3 @@
-
#include "request-data.h"
#include "plugin-support.h"
#include "errors.h"
@@ -9,69 +8,83 @@
struct request_data_handler_response parse_xml(struct request_data_handler_response response,
const url_source_request_data *request_data)
{
- // Parse the response as XML using pugixml
pugi::xml_document doc;
+
try {
- pugi::xml_parse_result result = doc.load_string(response.body.c_str());
- if (!result) {
+ // Use load_buffer instead of load_string for better performance with known size
+ pugi::xml_parse_result result =
+ doc.load_buffer(response.body.c_str(), response.body.size(),
+ pugi::parse_default, pugi::encoding_utf8);
+ if (!result) {
obs_log(LOG_INFO, "Failed to parse XML response: %s", result.description());
return make_fail_parse_response(result.description());
}
+
if (doc.empty()) {
- obs_log(LOG_INFO, "Failed to parse XML response: Empty XML");
- return make_fail_parse_response("Empty XML");
+ return make_fail_parse_response("Empty XML document");
}
- std::string parsed_output = "";
- // Get the output value
- if (request_data->output_xpath != "") {
+
+ if (!request_data->output_xpath.empty()) {
obs_log(LOG_INFO, "Parsing XML with XPath: %s",
request_data->output_xpath.c_str());
- pugi::xpath_node_set nodes =
- doc.select_nodes(request_data->output_xpath.c_str());
- if (nodes.size() > 0) {
- parsed_output = nodes[0].node().text().get();
- } else {
- obs_log(LOG_INFO, "Failed to get XML value");
- return make_fail_parse_response("Failed to get XML value");
+
+ // Compile XPath expression once for better performance
+ pugi::xpath_query query(request_data->output_xpath.c_str());
+ pugi::xpath_node_set nodes = query.evaluate_node_set(doc);
+
+ if (nodes.empty()) {
+ return make_fail_parse_response("XPath query returned no results");
+ }
+
+ // Get all matching nodes
+ for (const auto &node : nodes) {
+ response.body_parts_parsed.push_back(node.node().text().get());
}
} else {
// Return the whole XML object
- parsed_output = response.body;
+ response.body_parts_parsed.push_back(response.body);
}
- response.body_parts_parsed.push_back(parsed_output);
+
+ return response;
+
+ } catch (const pugi::xpath_exception &e) {
+ obs_log(LOG_INFO, "XPath evaluation failed: %s", e.what());
+ return make_fail_parse_response(e.what());
} catch (const std::exception &e) {
- obs_log(LOG_INFO, "Failed to parse XML response: %s", e.what());
+ obs_log(LOG_INFO, "XML parsing failed: %s", e.what());
return make_fail_parse_response(e.what());
}
- return response;
}
struct request_data_handler_response
parse_xml_by_xquery(struct request_data_handler_response response,
const url_source_request_data *request_data)
{
- // Parse the response as XML using pugixml
pugi::xml_document doc;
- pugi::xml_parse_result result = doc.load_string(response.body.c_str());
- if (!result) {
- obs_log(LOG_INFO, "Failed to parse XML response: %s", result.description());
- // Return an error response
- struct request_data_handler_response responseFail;
- responseFail.error_message = result.description();
- responseFail.status_code = URL_SOURCE_REQUEST_PARSING_ERROR_CODE;
- return responseFail;
- }
- std::string parsed_output = "";
- // Get the output value
- if (request_data->output_xquery != "") {
- pugi::xpath_query query_entity(request_data->output_xquery.c_str());
- std::string s = query_entity.evaluate_string(doc);
- parsed_output = s;
- } else {
- // Return the whole XML object
- parsed_output = response.body;
+
+ try {
+ pugi::xml_parse_result result =
+ doc.load_buffer(response.body.c_str(), response.body.size(),
+ pugi::parse_default, pugi::encoding_utf8);
+
+ if (!result) {
+ return make_fail_parse_response(result.description());
+ }
+
+ if (!request_data->output_xquery.empty()) {
+ pugi::xpath_query query(request_data->output_xquery.c_str());
+ std::string result = query.evaluate_string(doc);
+ response.body_parts_parsed.push_back(std::move(result));
+ } else {
+ response.body_parts_parsed.push_back(response.body);
+ }
+
+ return response;
+
+ } catch (const pugi::xpath_exception &e) {
+ return make_fail_parse_response(e.what());
+ } catch (const std::exception &e) {
+ return make_fail_parse_response(e.what());
}
- response.body_parts_parsed.push_back(parsed_output);
- return response;
}