Skip to content

Commit

Permalink
Revert new Extractor Regex Replace functionality after early merge (#302
Browse files Browse the repository at this point in the history
)

* Revert "Add regex replace functionality to transformation filter extractors (#301)"

This reverts commit bfdcd3e.

* add changelog entry

* restore old changelog entry
  • Loading branch information
ben-taussig-solo authored Feb 2, 2024
1 parent bfdcd3e commit 9a08439
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 703 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -153,19 +153,6 @@ message Transformation {
// Extractions can be used to extract information from the request/response.
// The extracted information can then be referenced in template fields.
message Extraction {
// The mode of operation for the extraction.
enum Mode {
// Default mode. Extract the value of the subgroup-th capturing group.
EXTRACT = 0;
// Replace the value of the subgroup-th capturing group with the replacement_text.
// Note: replacement_text must be set for this mode.
SINGLE_REPLACE = 1;
// Replace all matches of the regex in the source with the replacement_text.
// Note: replacement_text must be set for this mode.
// Note: subgroup is ignored for this mode. configuration will fail if subgroup is set.
// Note: restrictions on the regex are different for this mode. See the regex field for more details.
REPLACE_ALL = 2;
}

// The source of the extraction
oneof source {
Expand All @@ -175,31 +162,15 @@ message Extraction {
google.protobuf.Empty body = 4;
}

// The regex field specifies the regular expression used for matching against the source content.
// - In EXTRACT mode, the entire source must match the regex. The subgroup-th capturing group,
// if specified, determines which part of the match is extracted.
// - In SINGLE_REPLACE mode, the regex also needs to match the entire source. The subgroup-th capturing group
// is targeted for replacement with the replacement_text.
// - In REPLACE_ALL mode, the regex is applied repeatedly to find all occurrences within the source that match.
// Each matching occurrence is replaced with the replacement_text, and the subgroup field is not used.
// This field is required, and if the regex does not match the source as per the selected mode, the result of
// the extraction will be an empty value.
// Only strings matching this regular expression will be part of the
// extraction. The most simple value for this field is '.*', which matches the
// whole source. The field is required. If extraction fails the result is an
// empty value.
string regex = 2;

// If your regex contains capturing groups, use this field to determine which
// group should be selected.
// For EXTRACT and SINGLE_REPLACE, refers to the portion of the text
// to extract/replace.
// Config will be rejected if this is specified in REPLACE_ALL mode.
uint32 subgroup = 3;

// The string to replace the matched portion of the source with.
// Used in SINGLE_REPLACE and REPLACE_ALL modes.
google.protobuf.StringValue replacement_text = 5;

// The mode of operation for the extraction.
// Defaults to EXTRACT.
Mode mode = 6;
}

// Defines a transformation template.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ changelog:
issueLink: https://github.com/solo-io/gloo/issues/8706
description: >
Update transformation filter extractors to support regex
replace/replace all operations on extracted values.
replace/replace all operations on extracted values.
5 changes: 5 additions & 0 deletions changelog/v1.27.2-patch2/revert-regex-replace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
changelog:
- type: NON_USER_FACING
description: >-
Revert regex replace functionality which accidentally merged earlier
than expected
107 changes: 3 additions & 104 deletions source/extensions/filters/http/transformation/inja_transformer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@ getHeader(const Http::RequestOrResponseHeaderMap &header_map,
Extractor::Extractor(const envoy::api::v2::filter::http::Extraction &extractor)
: headername_(extractor.header()), body_(extractor.has_body()),
group_(extractor.subgroup()),
extract_regex_(Solo::Regex::Utility::parseStdRegex(extractor.regex())),
replacement_text_(extractor.has_replacement_text() ? std::make_optional(extractor.replacement_text().value()) : std::nullopt),
mode_(extractor.mode()) {
extract_regex_(Solo::Regex::Utility::parseStdRegex(extractor.regex())) {
// mark count == number of sub groups, and we need to add one for match number
// 0 so we test for < instead of <= see:
// http://www.cplusplus.com/reference/regex/basic_regex/mark_count/
Expand All @@ -67,29 +65,6 @@ Extractor::Extractor(const envoy::api::v2::filter::http::Extraction &extractor)
fmt::format("group {} requested for regex with only {} sub groups",
group_, extract_regex_.mark_count()));
}

switch (mode_) {
case ExtractionApi::EXTRACT:
extraction_func_ = std::bind(&Extractor::extractValue, this, _1, _2);
break;
case ExtractionApi::SINGLE_REPLACE:
if (!replacement_text_.has_value()) {
throw EnvoyException("SINGLE_REPLACE mode set but no replacement text provided");
}
extraction_func_ = std::bind(&Extractor::replaceIndividualValue, this, _1, _2);
break;
case ExtractionApi::REPLACE_ALL:
if (!replacement_text_.has_value()) {
throw EnvoyException("REPLACE_ALL mode set but no replacement text provided");
}
if (group_ != 0) {
throw EnvoyException("REPLACE_ALL mode set but subgroup is not 0");
}
extraction_func_ = std::bind(&Extractor::replaceAllValues, this, _1, _2);
break;
default:
throw EnvoyException("Unknown mode");
}
}

absl::string_view
Expand All @@ -99,14 +74,13 @@ Extractor::extract(Http::StreamFilterCallbacks &callbacks,
if (body_) {
const std::string &string_body = body();
absl::string_view sv(string_body);
return extraction_func_(callbacks, sv);
return extractValue(callbacks, sv);
} else {
const Http::HeaderMap::GetResult header_entries = getHeader(header_map, headername_);
if (header_entries.empty()) {
return "";
}
const auto &header_value = header_entries[0]->value().getStringView();
return extraction_func_(callbacks, header_value);
return extractValue(callbacks, header_entries[0]->value().getStringView());
}
}

Expand All @@ -131,81 +105,6 @@ Extractor::extractValue(Http::StreamFilterCallbacks &callbacks,
return "";
}

// Match a regex against the input value and replace the matched subgroup with the replacement_text_ value
// writes the result to replaced_value_ and returns a absl::string_view to it
absl::string_view
Extractor::replaceIndividualValue(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const {
std::match_results<absl::string_view::const_iterator> regex_result;

// if there are no matches, return an empty string
if (!std::regex_search(value.begin(), value.end(), regex_result, extract_regex_)) {
ENVOY_STREAM_LOG(debug, "replaceValue: extractor regex did not match input", callbacks);
return "";
}

// if the subgroup specified is greater than the number of subgroups in the regex, return an empty string
if (group_ >= regex_result.size()) {
// this should never happen as we test this in the ctor.
ASSERT("no such group in the regex");
ENVOY_STREAM_LOG(debug, "replaceValue: invalid group specified for regex", callbacks);
return "";
}

// if the regex doesn't match the entire input value, return an empty string
if (regex_result[0].length() != long(value.length())) {
ENVOY_STREAM_LOG(debug, "replaceValue: Regex did not match entire input value. This is not allowed in SINGLE_REPLACE mode.", callbacks);
return "";
}

// Create a new string with the maximum possible length after replacement
auto max_possible_length = value.length() + replacement_text_.value().length();
std::string replaced;
replaced.reserve(max_possible_length);

auto subgroup_start = regex_result[group_].first;
auto subgroup_end = regex_result[group_].second;

// Copy the initial part of the string until the match
replaced.assign(value.begin(), subgroup_start);

// Append the replacement text
replaced += replacement_text_.value();

// Append the remaining part of the string after the match
replaced.append(subgroup_end, value.end());

// Store the replaced string in replaced_value_ and return an absl::string_view to it
replaced_value_ = std::move(replaced);
return absl::string_view(replaced_value_);
}

// Match a regex against the input value and replace all instances of the regex with the replacement_text_ value
// writes the result to replaced_value_ and returns a absl::string_view to it
absl::string_view
Extractor::replaceAllValues(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const {
std::string input(value.begin(), value.end());
std::string replaced;

// create an iterator to search for matches of extract_regex_ in the input string
std::sregex_iterator it(input.begin(), input.end(), extract_regex_);
std::sregex_iterator end_it; // default end iterator for comparison

// return an empty string if the regex doesn't match any part of the input value
if (it == end_it) {
ENVOY_STREAM_LOG(debug, "replaceAllValues: extractor regex did not match input", callbacks);
return "";
}

// If a match was found, replace all instances of the regex in the input value with the replacement_text_ value
replaced = std::regex_replace(input, extract_regex_, replacement_text_.value());

// Store the replaced string in replaced_value_ and return an absl::string_view to it
replaced_value_ = std::move(replaced);
return absl::string_view(replaced_value_);
}

// A TransformerInstance is constructed by the InjaTransformer constructor at config time
// on the main thread. It access thread-local storage which is populated during the
// InjaTransformer::transform method call, which happens on the request path on any
Expand Down
12 changes: 1 addition & 11 deletions source/extensions/filters/http/transformation/inja_transformer.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ namespace HttpFilters {
namespace Transformation {

using GetBodyFunc = std::function<const std::string &()>;
using ExtractionFunc = std::function<absl::string_view(Http::StreamFilterCallbacks &callbacks, absl::string_view value)>;
using ExtractionApi = envoy::api::v2::filter::http::Extraction;

struct ThreadLocalTransformerContext : public ThreadLocal::ThreadLocalObject {
public:
Expand Down Expand Up @@ -84,23 +82,15 @@ class Extractor : Logger::Loggable<Logger::Id::filter> {
absl::string_view extract(Http::StreamFilterCallbacks &callbacks,
const Http::RequestOrResponseHeaderMap &header_map,
GetBodyFunc &body) const;

private:
absl::string_view extractValue(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const;
absl::string_view replaceIndividualValue(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const;
absl::string_view replaceAllValues(Http::StreamFilterCallbacks &callbacks,
absl::string_view value) const;

const Http::LowerCaseString headername_;
const bool body_;
const unsigned int group_;
const std::regex extract_regex_;
const std::optional<std::string> replacement_text_;
const ExtractionApi::Mode mode_;

ExtractionFunc extraction_func_;
mutable std::string replaced_value_;
};

class InjaTransformer : public Transformer {
Expand Down
15 changes: 0 additions & 15 deletions test/extensions/filters/http/transformation/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,6 @@ envoy_gloo_cc_test(
],
)

envoy_gloo_cc_test(
name = "inja_transformer_replace_test",
srcs = ["inja_transformer_replace_test.cc"],
repository = "@envoy",
deps = [
"//source/extensions/filters/http/transformation:inja_transformer_lib",
"@envoy//source/common/common:random_generator_lib",
"@envoy//source/common/common:base64_lib",
"@envoy//test/test_common:environment_lib",
"@envoy//test/mocks/http:http_mocks",
"@envoy//test/mocks/server:server_mocks",
"@envoy//test/mocks/upstream:upstream_mocks",
],
)

envoy_cc_test_binary(
name = "inja_transformer_speed_test",
srcs = ["inja_transformer_speed_test.cc"],
Expand Down
Loading

0 comments on commit 9a08439

Please sign in to comment.