diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/HttpByteBufFormatter.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/HttpByteBufFormatter.java
index e2e807818..c01925b46 100644
--- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/HttpByteBufFormatter.java
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/HttpByteBufFormatter.java
@@ -197,7 +197,8 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception
} else {
content.release();
}
- } else if (msg instanceof HttpMessage) {
+ }
+ if (msg instanceof HttpMessage) { // this & HttpContent are interfaces & 'Full' messages implement both
message = (HttpMessage) msg;
}
if (msg instanceof LastHttpContent) {
@@ -206,16 +207,16 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws Exception
}
var finalMsg = (message instanceof HttpRequest)
? new DefaultFullHttpRequest(message.protocolVersion(),
- ((HttpRequest) message).method(),
- ((HttpRequest) message).uri(),
- aggregatedContents,
- message.headers(),
- ((LastHttpContent) msg).trailingHeaders())
+ ((HttpRequest) message).method(),
+ ((HttpRequest) message).uri(),
+ aggregatedContents,
+ message.headers(),
+ ((LastHttpContent) msg).trailingHeaders())
: new DefaultFullHttpResponse(message.protocolVersion(),
- ((HttpResponse)message).status(),
- aggregatedContents,
- message.headers(),
- ((LastHttpContent) msg).trailingHeaders());
+ ((HttpResponse)message).status(),
+ aggregatedContents,
+ message.headers(),
+ ((LastHttpContent) msg).trailingHeaders());
super.channelRead(ctx, finalMsg);
}
}
diff --git a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ResultsToLogsConsumer.java b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ResultsToLogsConsumer.java
index 89ca4354f..528e89b93 100644
--- a/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ResultsToLogsConsumer.java
+++ b/TrafficCapture/trafficReplayer/src/main/java/org/opensearch/migrations/replay/ResultsToLogsConsumer.java
@@ -161,6 +161,7 @@ public static String getTransactionSummaryStringPreamble() {
.add("SOURCE_STATUS_CODE/TARGET_STATUS_CODE...")
.add("SOURCE_RESPONSE_SIZE_BYTES/TARGET_RESPONSE_SIZE_BYTES...")
.add("SOURCE_LATENCY_MS/TARGET_LATENCY_MS...")
+ .add("METHOD...")
.add("URI...")
.toString();
}
@@ -219,6 +220,11 @@ public static String toTransactionSummaryString(
transformStreamToString(parsed.targetResponseList.stream(),
r -> "" + r.get(ParsedHttpMessagesAsDicts.RESPONSE_TIME_MS_KEY))
)
+ // method
+ .add(
+ parsed.sourceRequestOp
+ .map(r -> (String) r.get(ParsedHttpMessagesAsDicts.METHOD_KEY))
+ .orElse(MISSING_STR))
// uri
.add(
parsed.sourceRequestOp
diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/README.md b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/README.md
index 61eae7074..b47b75e54 100644
--- a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/README.md
+++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/README.md
@@ -55,47 +55,71 @@ GET activity/post/_search
## Routing data to new indices
-The structure of the documents and indices need to change. Some options are to use separate indices, drop some of
+The structure of the documents and indices need to change. Options are to use separate indices, drop some of
the types to make an index single-purpose, or to create an index that's the union of all the types' fields.
-With a simple mapping directive, we can define each of these three behaviors. The following yaml shows how to map
-documents into two different indices named users and posts:
+Specific instances of those behaviors can be expressed via a map (or dictionary) or indices to types to target indices.
+The following sample json shows how to map documents from the 'activity' index into two different indices
+('users' and 'posts'):
```
-activity:
- user: new_users
- post: new_posts
+{
+ "activity": {
+ "user: "new_users",
+ "post": "new_posts"
+ }
```
-To drop one type, just leave it out:
+To drop the 'post' type, just leave it out:
```
-activity:
- user: only_users
+{
+ "activity": {
+ "user": "only_users"
+ }
+}
```
-To merge types together, use the same value:
+To merge types into a single index, use the same value:
```
-activity:
- user: any_activity
- post: any_activity
+{
+ "activity": {
+ "user": "any_activity",
+ "post": "any_activity",
+ }
+}
```
-Any _indices_ that are NOT specified won't be modified - all additions, changes, and queries on those other indices not
-specified at the root level will remain untouched by the static mapping rewriter. However, missing types from a
-specified index will be removed. To remove ALL the activity for a given index, specify an index with no
-children types.
+To remove ALL the activity for a given index, specify an index with no children types.
```
-activity: {}
+{
+ "activity": {}
+}
```
-In addition to static source/target mappings, users can specify source and type pairs as a regex and use any captured
-groups in the target index name. Regex rules take precedent _after_ the static rules and are only applied when there
-was no index match in the static mappings.
+Those regex rules take precedence **after** the static mappings specified above.
+
+In addition to static source/target mappings, users can specify source and type pairs as regex patterns and
+use captured groups in the target index name.
+Any source _indices_ that are NOT specified in the maps will be processed through the regex route rules.
+The regex rules are only applied if the source index doesn't match a key in the static route map
-Regex replacement is controlled via an ordered list of `[indexNamePattern, typeNamePattern, replacementString]`.
-The transformer will use the replacement for the first matched item found.
-If none are found, unlike missing indices for static mappings, the system presumes that the index and type are
-**NOT** to be propagated to the target - any reference to those types and their corresponding data will be suppressed.
-To preserve all items, a default rule will need to be included.
+Regex replace rules are evaluated by concatenating the source index and source types into a single string.
+The pattern components are also concatenated into a corresponding match string.
+The replacement value will replace the _matched_ part of the source index + typename and replace it with the
+specified value.
+If that specified value contains (numerical) backreferences, those will pull from the captured groups of the
+concatenated pattern.
+The concatenated pattern is the index pattern followed by the type pattern, meaning that the groups in the index are
+numbered from 1 and the type pattern group numbers start after all the groups from the index.
+
+Missing types from a specified index will be removed.
+When the regex pattern isn't defined `["(.*)", "(.*)", "\\1_\\2"]` is used to map each type into its own isolated
+index, preserving all data and its separation.
+
+For more details about regexes, see the [Python](https://docs.python.org/3/library/re.html#re.sub) or
+[Java](https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html) documentation.
+This transform uses python-style backreferences (`'`\1`) for replacement patterns.
+Notice that regexes can NOT be specified in the index-type map.
+They can _only_ be used via the list, which will be evaluated in the order of the list until a match is found.
The following sample shows how indices that start with 'time-' will be migrated and every other index and type not
already matched will be dropped.
@@ -114,41 +138,18 @@ merging all types into a single index with the same name as the source index.
]
```
-## Final Results
-
-```
-PUT any_activity
-{
- "mappings": {
- "properties": {
- "type": {
- "type": "keyword"
- },
- "name": {
- "type": "text"
- },
- "user_name": {
- "type": "keyword"
- },
- "email": {
- "type": "keyword"
- },
- "content": {
- "type": "text"
- },
- "tweeted_at": {
- "type": "date"
- }
- }
- }
-}
-
-PUT any_activity/_doc/someuser
-{
- "name": "Some User",
- "user_name": "user",
- "email": "user@example.com"
-}
-
+For more examples, compare the following cases.
+Though note that anything matched by the static maps shown above will block any of these rules from being evaluated.
+
+| Regex Entry | Source Index | Source Type | Target Index | PUT Doc URL | Bulk Index Command |
+|--------------------------------------------------------------------------------|-------------|-------------|-------------------|--------------------------------|----------------------------------------------------------------|
+| `[["time-(.*)", "(cpu)", "time-\\1-\\2"]]` | time-nov11 | cpu | time-nov11-cpu | /time-nov11-cpu/_doc/doc512 | `{"index": {"_index": "time-nov11-cpu", "_id": "doc512" }}` |
+| `[["time-(.*)", "(cpu)", "time-\\1-\\2"]]` | logs | access | [DELETED] | [DELETED] | [DELETED] |
+| `[["time-(.*)", "(cpu)", "time-\\1-\\2"],`
` ["(.*)", "(.*)", "\\1-\\2"]]` | logs | access | logs_access | /logs_access/_doc/doc513 | `{"index": {"_index": "logs_access", "_id": "doc513" }}` |
+| `[["time-(.*)", "(cpu)", "time-\\1-\\2"],`
`[["", ".*", ""]]` | everything | widgets | everything | /everything/_doc/doc514 | `{"index": {"_index": "everything", "_id": "doc514" }}` |
+| `[["time-(.*)", "(cpu)", "time-\\1-\\2"],`
`[["", ".*", ""]]` | everything | sprockets | everything | /everything/_doc/doc515 | `{"index": {"_index": "everything", "_id": "doc515" }}` |
+| `[["time-(.*)", "(.*)-(cpu)", "\\2-\\3-\\1"]]` | time-nov11 | host123-cpu | host123-cpu-nov11 | /host123-cpu-nov11/_doc/doc512 | `{"index": {"_index": "host123-cpu-nov11", "_id": "doc512" }}` |
+| `[["", ".*", ""]]` | metadata | users | metadata | /metadata/_doc/doc516 | `{"index": {"_index": "metadata", "_id": "doc516" }}` |
+| `[[".*", ".*", "leftovers"]]` | logs | access | leftovers | /leftovers/_doc/doc517 | `{"index": {"_index": "leftovers", "_id": "doc517" }}` |
+| `[[".*", ".*", "leftovers"]]` | permissions | access | leftovers | /leftovers/_doc/doc517 | `{"index": {"_index": "leftovers", "_id": "doc517" }}` |
-```
\ No newline at end of file
diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformer.java b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformer.java
index 226b858b9..0ea6d409e 100644
--- a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformer.java
+++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/java/org/opensearch/migrations/transform/TypeMappingsSanitizationTransformer.java
@@ -49,7 +49,7 @@ public TypeMappingsSanitizationTransformer(
// types of patterns are being used.
// This regex says, match the type part and reduce it to nothing, leave the index part untouched.
var regexIndexMappings = Optional.ofNullable(regexIndexMappingsIncoming)
- .orElseGet(() -> (indexMappingsIncoming == null ? List.of(List.of("", ".*", "")) : List.of()));
+ .orElseGet(() -> (indexMappingsIncoming == null ? List.of(List.of("(.*)", "(.*)", "\\1_\\2")) : List.of()));
return incomingJson -> Map.of("source_document", incomingJson,
"index_mappings", indexMappings,
diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteDocumentRequest.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteDocumentRequest.j2
index 6fc665bb2..97475c5be 100644
--- a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteDocumentRequest.j2
+++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteDocumentRequest.j2
@@ -9,7 +9,7 @@
{
"method": "{{ input_map.request.method }}",
"URI": "/{{ target_index }}/_doc/{{ match.group3 }}",
- "preserveWhenMissing": ["headers","payload"]
+ "preserveWhenMissing": "*"
}
{%- endif -%}
{%- endmacro -%}
diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteIndexForTarget.j2 b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteIndexForTarget.j2
index f0606d83f..150f6d1a9 100644
--- a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteIndexForTarget.j2
+++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/main/resources/jinjava/typeMappings/rewriteIndexForTarget.j2
@@ -15,10 +15,14 @@
{%- endmacro -%}
{%- macro convert_source_index_to_target(source_index, source_type, index_mappings, regex_index_mappings) -%}
- {%- set ns = namespace(target_index=none) -%}
- {%- set ns.target_index = (index_mappings[source_index] | default({}))[source_type] -%}
- {%- if ns.target_index is none -%}
- {%- set ns.target_index = convert_source_index_to_target_via_regex(source_index, source_type, regex_index_mappings) -%}
+ {%- if source_type == "_doc" -%}
+ {{- source_index -}}
+ {%- else -%}
+ {%- set ns = namespace(target_index=none) -%}
+ {%- set ns.target_index = (index_mappings[source_index] | default({}))[source_type] -%}
+ {%- if ns.target_index is none -%}
+ {%- set ns.target_index = convert_source_index_to_target_via_regex(source_index, source_type, regex_index_mappings) -%}
+ {%- endif -%}
+ {{- ns.target_index -}}
{%- endif -%}
- {{- ns.target_index -}}
{%- endmacro -%}
\ No newline at end of file
diff --git a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationDocBackfillTest.java b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationDocBackfillTest.java
index be9d53909..7318d25c9 100644
--- a/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationDocBackfillTest.java
+++ b/transformation/transformationPlugins/jsonMessageTransformers/jsonTypeMappingsSanitizationTransformer/src/test/java/org/opensearch/migrations/transform/TypeMappingsSanitizationDocBackfillTest.java
@@ -1,7 +1,6 @@
package org.opensearch.migrations.transform;
import java.util.LinkedHashMap;
-import java.util.List;
import org.opensearch.migrations.testutils.JsonNormalizer;
@@ -23,13 +22,12 @@ public void test() throws Exception {
"}";
var expectedString = "{\n" +
- " \"index\": { \"_index\": \"network\", \"_id\": \"1\" },\n" +
+ " \"index\": { \"_index\": \"performance_network\", \"_id\": \"1\" },\n" +
" \"source\": { \"field1\": \"value1\" }\n" +
"}";
- var regexIndexMappings = List.of(List.of(".*", "", ""));
- var indexTypeMappingRewriter = new TypeMappingsSanitizationTransformer(null, regexIndexMappings);
+ var indexTypeMappingRewriter = new TypeMappingsSanitizationTransformer(null, null);
var resultObj = indexTypeMappingRewriter.transformJson(OBJECT_MAPPER.readValue(testString, LinkedHashMap.class));
log.atInfo().setMessage("resultStr = {}").addArgument(() -> {
try {