diff --git a/CHANGELOG.md b/CHANGELOG.md index 84fe8df82a887..1bd3c7d3e7a29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,9 +11,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add events correlation engine plugin ([#6854](https://github.com/opensearch-project/OpenSearch/issues/6854)) - Introduce new dynamic cluster setting to control slice computation for concurrent segment search ([#9107](https://github.com/opensearch-project/OpenSearch/pull/9107)) - Implement on behalf of token passing for extensions ([#8679](https://github.com/opensearch-project/OpenSearch/pull/8679)) -- Implement Visitor Design pattern in QueryBuilder to enable the capability to traverse through the complex QueryBuilder tree. ([#10110](https://github.com/opensearch-project/OpenSearch/pull/10110)) - Provide service accounts tokens to extensions ([#9618](https://github.com/opensearch-project/OpenSearch/pull/9618)) -- Configurable merge policy for index with an option to choose from LogByteSize and Tiered merge policy ([#9992](https://github.com/opensearch-project/OpenSearch/pull/9992)) +- [Admission control] Add enhancements to FS stats to include read/write time, queue size and IO time ([#10541](https://github.com/opensearch-project/OpenSearch/pull/10541)) +- [Admission control] Add Resource usage collector service and resource usage tracker ([#9890](https://github.com/opensearch-project/OpenSearch/pull/9890)) +- Change file names for remote cluster state ([#10557](https://github.com/opensearch-project/OpenSearch/pull/10557)) ### Dependencies - Bump `log4j-core` from 2.18.0 to 2.19.0 @@ -45,6 +46,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Bump JNA version from 5.5 to 5.13 ([#9963](https://github.com/opensearch-project/OpenSearch/pull/9963)) - Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822)) - Bump `org.eclipse.jgit` from 6.5.0 to 6.7.0 ([#10147](https://github.com/opensearch-project/OpenSearch/pull/10147)) +- Bump OpenTelemetry from 1.30.1 to 1.31.0 ([#10617](https://github.com/opensearch-project/OpenSearch/pull/10617)) ### Changed - [CCR] Add getHistoryOperationsFromTranslog method to fetch the history snapshot from translogs ([#3948](https://github.com/opensearch-project/OpenSearch/pull/3948)) @@ -84,17 +86,38 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ## [Unreleased 2.x] ### Added +- Per request phase latency ([#10351](https://github.com/opensearch-project/OpenSearch/issues/10351)) - [Remote Store] Add repository stats for remote store([#10567](https://github.com/opensearch-project/OpenSearch/pull/10567)) ### Dependencies +- Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) +- Bump Lucene from 9.7.0 to 9.8.0 ([10276](https://github.com/opensearch-project/OpenSearch/pull/10276)) +- Bump `com.netflix.nebula.ospackage-base` from 11.4.0 to 11.5.0 ([#10295](https://github.com/opensearch-project/OpenSearch/pull/10295)) +- Bump `org.bouncycastle:bc-fips` from 1.0.2.3 to 1.0.2.4 ([#10297](https://github.com/opensearch-project/OpenSearch/pull/10297)) +- Bump `org.apache.zookeeper:zookeeper` from 3.9.0 to 3.9.1 ([#10506](https://github.com/opensearch-project/OpenSearch/pull/10506)) +- Bump `de.thetaphi:forbiddenapis` from 3.5.1 to 3.6 ([#10508](https://github.com/opensearch-project/OpenSearch/pull/10508)) +- Bump `commons-io:commons-io` from 2.13.0 to 2.14.0 ([#10294](https://github.com/opensearch-project/OpenSearch/pull/10294)) +- Bump `org.codehaus.woodstox:stax2-api` from 4.2.1 to 4.2.2 ([#10639](https://github.com/opensearch-project/OpenSearch/pull/10639)) +- Bump `com.google.http-client:google-http-client` from 1.43.2 to 1.43.3 ([#10635](https://github.com/opensearch-project/OpenSearch/pull/10635)) +- Bump `com.squareup.okio:okio` from 3.5.0 to 3.6.0 ([#10637](https://github.com/opensearch-project/OpenSearch/pull/10637)) ### Changed +- Mute the query profile IT with concurrent execution ([#9840](https://github.com/opensearch-project/OpenSearch/pull/9840)) +- Force merge with `only_expunge_deletes` honors max segment size ([#10036](https://github.com/opensearch-project/OpenSearch/pull/10036)) +- Add the means to extract the contextual properties from HttpChannel, TcpCChannel and TrasportChannel without excessive typecasting ([#10562](https://github.com/opensearch-project/OpenSearch/pull/10562)) +- [Remote Store] Add Remote Store backpressure rejection stats to `_nodes/stats` ([#10524](https://github.com/opensearch-project/OpenSearch/pull/10524)) +- [BUG] Fix java.lang.SecurityException in repository-gcs plugin ([#10642](https://github.com/opensearch-project/OpenSearch/pull/10642)) +- Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) ### Deprecated ### Removed ### Fixed +- Fix failure in dissect ingest processor parsing empty brackets ([#9225](https://github.com/opensearch-project/OpenSearch/pull/9255)) +- Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101)) +- Fix Segment Replication ShardLockObtainFailedException bug during index corruption ([10370](https://github.com/opensearch-project/OpenSearch/pull/10370)) +- Fix some test methods in SimulatePipelineRequestParsingTests never run and fix test failure ([#10496](https://github.com/opensearch-project/OpenSearch/pull/10496)) ### Security diff --git a/buildSrc/version.properties b/buildSrc/version.properties index 64f9f9a8828ad..a5171aa582a86 100644 --- a/buildSrc/version.properties +++ b/buildSrc/version.properties @@ -68,4 +68,5 @@ jzlib = 1.1.3 resteasy = 6.2.4.Final # opentelemetry dependencies -opentelemetry = 1.30.1 +opentelemetry = 1.31.0 +opentelemetrysemconv = 1.21.0-alpha diff --git a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java index b6dc0ceb1028f..828d4b7de450e 100644 --- a/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/opensearch/dissect/DissectParser.java @@ -231,7 +231,10 @@ public Map parse(String inputString) { int lookAheadMatches; // start walking the input string byte by byte, look ahead for matches where needed // if a match is found jump forward to the end of the match - for (; i < input.length; i++) { + while (i < input.length) { + // start is only used to record the value of i + int start = i; + lookAheadMatches = 0; // potential match between delimiter and input string if (delimiter.length > 0 && input[i] == delimiter[0]) { @@ -283,8 +286,14 @@ public Map parse(String inputString) { delimiter = dissectPair.getDelimiter().getBytes(StandardCharsets.UTF_8); // i is always one byte after the last found delimiter, aka the start of the next value valueStart = i; + } else { + i++; } + } else { + i++; } + // i should change anyway + assert (i != start); } // the last key, grab the rest of the input (unless consecutive delimiters already grabbed the last key) // and there is no trailing delimiter diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java index 2f70c28efb1cd..fb3dec8152b4f 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/metrics/MetricsTelemetry.java @@ -10,14 +10,12 @@ import org.opensearch.common.annotation.ExperimentalApi; -import java.io.Closeable; - /** * Interface for metrics telemetry providers * * @opensearch.experimental */ @ExperimentalApi -public interface MetricsTelemetry extends MetricsRegistry, Closeable { +public interface MetricsTelemetry extends MetricsRegistry { } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java index 79b7e4aca6c2f..a3bb64ea392a9 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/DefaultTracer.java @@ -85,6 +85,11 @@ public SpanScope withSpanInScope(Span span) { return DefaultSpanScope.create(span, tracerContextStorage).attach(); } + @Override + public boolean isRecording() { + return true; + } + private Span createSpan(SpanCreationContext spanCreationContext, Span parentSpan) { return tracingTelemetry.createSpan(spanCreationContext, parentSpan); } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java index e6d4878a5e833..8257d251e9560 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/Tracer.java @@ -53,4 +53,10 @@ public interface Tracer extends HttpTracer, Closeable { */ SpanScope withSpanInScope(Span span); + /** + * Tells if the traces are being recorded or not + * @return boolean + */ + boolean isRecording(); + } diff --git a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java index c073e8d3e766f..50452ff5fe3b4 100644 --- a/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java +++ b/libs/telemetry/src/main/java/org/opensearch/telemetry/tracing/noop/NoopTracer.java @@ -54,6 +54,11 @@ public SpanScope withSpanInScope(Span span) { return SpanScope.NO_OP; } + @Override + public boolean isRecording() { + return false; + } + @Override public void close() { diff --git a/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java b/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java index 0a717e993cb81..2a791f1ae4164 100644 --- a/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java +++ b/libs/telemetry/src/test/java/org/opensearch/telemetry/tracing/DefaultTracerTests.java @@ -62,6 +62,7 @@ public void testCreateSpan() { String spanName = defaultTracer.getCurrentSpan().getSpan().getSpanName(); assertEquals("span_name", spanName); + assertTrue(defaultTracer.isRecording()); } @SuppressWarnings("unchecked") diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java index ca0c0df40f009..e42a1147825d1 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/DissectProcessorTests.java @@ -155,4 +155,28 @@ public void testNullValueWithOutIgnoreMissing() { IngestDocument ingestDocument = new IngestDocument(originalIngestDocument); expectThrows(IllegalArgumentException.class, () -> processor.execute(ingestDocument)); } + + public void testMatchEmptyBrackets() { + IngestDocument ingestDocument = new IngestDocument( + "_index", + "_id", + null, + null, + null, + Collections.singletonMap("message", "[foo],[bar],[]") + ); + DissectProcessor dissectProcessor = new DissectProcessor("", null, "message", "[%{a}],[%{b}],[%{c}]", "", true); + dissectProcessor.execute(ingestDocument); + assertEquals("foo", ingestDocument.getFieldValue("a", String.class)); + assertEquals("bar", ingestDocument.getFieldValue("b", String.class)); + assertEquals("", ingestDocument.getFieldValue("c", String.class)); + + ingestDocument = new IngestDocument("_index", "_id", null, null, null, Collections.singletonMap("message", "{}{}{}{baz}")); + dissectProcessor = new DissectProcessor("", null, "message", "{%{a}}{%{b}}{%{c}}{%{d}}", "", true); + dissectProcessor.execute(ingestDocument); + assertEquals("", ingestDocument.getFieldValue("a", String.class)); + assertEquals("", ingestDocument.getFieldValue("b", String.class)); + assertEquals("", ingestDocument.getFieldValue("c", String.class)); + assertEquals("baz", ingestDocument.getFieldValue("d", String.class)); + } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml index 916a7fe656cc2..d90e5fbf2362b 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/200_dissect_processor.yml @@ -84,3 +84,38 @@ teardown: } ] } + +--- +"Test dissect processor can match empty brackets": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "dissect" : { + "field" : "message", + "pattern" : "[%{a}][%{b}][%{c}]" + } + } + ] + } + - match: { acknowledged: true } + + - do: + index: + index: test + id: 1 + pipeline: "my_pipeline" + body: {message: "[foo][bar][]"} + + - do: + get: + index: test + id: 1 + - match: { _source.message: "[foo][bar][]" } + - match: { _source.a: "foo" } + - match: { _source.b: "bar" } + - match: { _source.c: "" } diff --git a/plugins/identity-shiro/src/main/java/org/opensearch/identity/shiro/ShiroTokenManager.java b/plugins/identity-shiro/src/main/java/org/opensearch/identity/shiro/ShiroTokenManager.java index ddfb99e626718..a14215aa7655b 100644 --- a/plugins/identity-shiro/src/main/java/org/opensearch/identity/shiro/ShiroTokenManager.java +++ b/plugins/identity-shiro/src/main/java/org/opensearch/identity/shiro/ShiroTokenManager.java @@ -10,13 +10,11 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.shiro.SecurityUtils; import org.apache.shiro.authc.AuthenticationToken; import org.apache.shiro.authc.UsernamePasswordToken; import org.opensearch.common.Randomness; import org.opensearch.identity.IdentityService; import org.opensearch.identity.Subject; -import org.opensearch.identity.noop.NoopSubject; import org.opensearch.identity.tokens.AuthToken; import org.opensearch.identity.tokens.BasicAuthToken; import org.opensearch.identity.tokens.OnBehalfOfClaims; @@ -88,20 +86,6 @@ public AuthToken issueServiceAccountToken(String audience) { return token; } - @Override - public Subject authenticateToken(AuthToken authToken) { - return new NoopSubject(); - } - - public boolean validateToken(AuthToken token) { - if (token instanceof BasicAuthToken) { - final BasicAuthToken basicAuthToken = (BasicAuthToken) token; - return basicAuthToken.getUser().equals(SecurityUtils.getSubject().toString()) - && basicAuthToken.getPassword().equals(shiroTokenPasswordMap.get(basicAuthToken)); - } - return false; - } - public String getTokenInfo(AuthToken token) { if (token instanceof BasicAuthToken) { final BasicAuthToken basicAuthToken = (BasicAuthToken) token; diff --git a/plugins/identity-shiro/src/test/java/org/opensearch/identity/shiro/AuthTokenHandlerTests.java b/plugins/identity-shiro/src/test/java/org/opensearch/identity/shiro/AuthTokenHandlerTests.java index db77ced298991..f99484083e2fb 100644 --- a/plugins/identity-shiro/src/test/java/org/opensearch/identity/shiro/AuthTokenHandlerTests.java +++ b/plugins/identity-shiro/src/test/java/org/opensearch/identity/shiro/AuthTokenHandlerTests.java @@ -100,11 +100,6 @@ public void testShouldFailGetTokenInfo() { assertThrows(UnsupportedAuthenticationToken.class, () -> shiroAuthTokenHandler.getTokenInfo(bearerAuthToken)); } - public void testShouldFailValidateToken() { - final BearerAuthToken bearerAuthToken = new BearerAuthToken("header.payload.signature"); - assertFalse(shiroAuthTokenHandler.validateToken(bearerAuthToken)); - } - public void testShoudPassMapLookupWithToken() { final BasicAuthToken authToken = new BasicAuthToken("Basic dGVzdDp0ZTpzdA=="); shiroAuthTokenHandler.getShiroTokenPasswordMap().put(authToken, "te:st"); diff --git a/plugins/repository-gcs/build.gradle b/plugins/repository-gcs/build.gradle index da4978608a12f..1bef5146f1db9 100644 --- a/plugins/repository-gcs/build.gradle +++ b/plugins/repository-gcs/build.gradle @@ -75,8 +75,8 @@ dependencies { runtimeOnly "com.google.guava:guava:${versions.guava}" api 'com.google.guava:failureaccess:1.0.1' - api 'com.google.http-client:google-http-client:1.43.2' - api 'com.google.http-client:google-http-client-appengine:1.43.2' + api 'com.google.http-client:google-http-client:1.43.3' + api 'com.google.http-client:google-http-client-appengine:1.43.3' api 'com.google.http-client:google-http-client-gson:1.43.3' api 'com.google.http-client:google-http-client-jackson2:1.43.3' diff --git a/plugins/repository-gcs/licenses/google-http-client-1.43.2.jar.sha1 b/plugins/repository-gcs/licenses/google-http-client-1.43.2.jar.sha1 deleted file mode 100644 index a576a74c62542..0000000000000 --- a/plugins/repository-gcs/licenses/google-http-client-1.43.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -2520469ebd8c0675f0d2aeafd2da665228320fcf \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/google-http-client-1.43.3.jar.sha1 b/plugins/repository-gcs/licenses/google-http-client-1.43.3.jar.sha1 new file mode 100644 index 0000000000000..800467de8bdf3 --- /dev/null +++ b/plugins/repository-gcs/licenses/google-http-client-1.43.3.jar.sha1 @@ -0,0 +1 @@ +a758b82e55a2f5f681e289c5ed384d3dbda6f3cd \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.2.jar.sha1 b/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.2.jar.sha1 deleted file mode 100644 index d8a9dba20070b..0000000000000 --- a/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9fb548c5264227813fd83991b94a705b0841c15f \ No newline at end of file diff --git a/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.3.jar.sha1 b/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.3.jar.sha1 new file mode 100644 index 0000000000000..4adcca6a55902 --- /dev/null +++ b/plugins/repository-gcs/licenses/google-http-client-appengine-1.43.3.jar.sha1 @@ -0,0 +1 @@ +09d6cbdde6ea3469a67601a811b4e83de3e68a79 \ No newline at end of file diff --git a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java index 445e1d65f3d3e..c9ebb3acaf3e5 100644 --- a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java +++ b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/GoogleCloudStorageService.java @@ -228,7 +228,7 @@ StorageOptions createStorageOptions( } storageOptionsBuilder.setCredentials(serviceAccountCredentials); } - return storageOptionsBuilder.build(); + return SocketAccess.doPrivilegedException(() -> storageOptionsBuilder.build()); } /** diff --git a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/SocketAccess.java b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/SocketAccess.java index 197e772df30d5..35127d6ea4060 100644 --- a/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/SocketAccess.java +++ b/plugins/repository-gcs/src/main/java/org/opensearch/repositories/gcs/SocketAccess.java @@ -32,6 +32,7 @@ package org.opensearch.repositories.gcs; +import org.apache.logging.log4j.core.util.Throwables; import org.opensearch.SpecialPermission; import org.opensearch.common.CheckedRunnable; @@ -71,4 +72,16 @@ public static void doPrivilegedVoidIOException(CheckedRunnable acti throw (IOException) e.getCause(); } } + + public static T doPrivilegedException(PrivilegedExceptionAction operation) { + SpecialPermission.check(); + try { + return AccessController.doPrivileged(operation); + } catch (PrivilegedActionException e) { + Throwables.rethrow(e.getCause()); + assert false : "always throws"; + return null; + } + } + } diff --git a/plugins/repository-hdfs/build.gradle b/plugins/repository-hdfs/build.gradle index 1ce29afe0e69a..ed1f54888a26f 100644 --- a/plugins/repository-hdfs/build.gradle +++ b/plugins/repository-hdfs/build.gradle @@ -84,7 +84,7 @@ dependencies { api 'net.minidev:json-smart:2.5.0' api "io.netty:netty-all:${versions.netty}" implementation "com.fasterxml.woodstox:woodstox-core:${versions.woodstox}" - implementation 'org.codehaus.woodstox:stax2-api:4.2.1' + implementation 'org.codehaus.woodstox:stax2-api:4.2.2' hdfsFixture project(':test:fixtures:hdfs-fixture') // Set the keytab files in the classpath so that we can access them from test code without the security manager diff --git a/plugins/repository-hdfs/licenses/stax2-api-4.2.1.jar.sha1 b/plugins/repository-hdfs/licenses/stax2-api-4.2.1.jar.sha1 deleted file mode 100644 index 2c12704cdc560..0000000000000 --- a/plugins/repository-hdfs/licenses/stax2-api-4.2.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -a3f7325c52240418c2ba257b103c3c550e140c83 \ No newline at end of file diff --git a/plugins/repository-hdfs/licenses/stax2-api-4.2.2.jar.sha1 b/plugins/repository-hdfs/licenses/stax2-api-4.2.2.jar.sha1 new file mode 100644 index 0000000000000..b15a7ead0d016 --- /dev/null +++ b/plugins/repository-hdfs/licenses/stax2-api-4.2.2.jar.sha1 @@ -0,0 +1 @@ +b0d746cadea928e5264f2ea294ea9a1bf815bbde \ No newline at end of file diff --git a/plugins/telemetry-otel/build.gradle b/plugins/telemetry-otel/build.gradle index 04fff20947b4f..f5c367cb7643b 100644 --- a/plugins/telemetry-otel/build.gradle +++ b/plugins/telemetry-otel/build.gradle @@ -28,7 +28,7 @@ dependencies { api "io.opentelemetry:opentelemetry-sdk-trace:${versions.opentelemetry}" api "io.opentelemetry:opentelemetry-sdk-metrics:${versions.opentelemetry}" api "io.opentelemetry:opentelemetry-exporter-logging:${versions.opentelemetry}" - api "io.opentelemetry:opentelemetry-semconv:${versions.opentelemetry}-alpha" + api "io.opentelemetry.semconv:opentelemetry-semconv:${versions.opentelemetrysemconv}" api "io.opentelemetry:opentelemetry-sdk-logs:${versions.opentelemetry}" api "io.opentelemetry:opentelemetry-exporter-otlp:${versions.opentelemetry}" api "io.opentelemetry:opentelemetry-exporter-common:${versions.opentelemetry}" diff --git a/plugins/telemetry-otel/licenses/opentelemetry-api-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-api-1.30.1.jar.sha1 deleted file mode 100644 index b0ce00e191830..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-api-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -a32dfbd7f01de6711fd0e970f8d4b4c0405056d6 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-api-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-api-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..eae141a8d1a23 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-api-1.31.0.jar.sha1 @@ -0,0 +1 @@ +bb24a44d73484c681c236aed84fe6c28d17f30e2 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-context-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-context-1.30.1.jar.sha1 deleted file mode 100644 index 84cb60a2f7acb..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-context-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -58f665ff01ce6b964cdf0b8cb5cd1c196dfe94ce \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-context-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-context-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..6e42973adc581 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-context-1.31.0.jar.sha1 @@ -0,0 +1 @@ +b8004737f7a970124e36ac71fde8eb88423e8cee \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.30.1.jar.sha1 deleted file mode 100644 index eccb15f7b7c8e..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -f299d336dba1039478497f37b273dfa764c6faef \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..b119468e7f88b --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-common-1.31.0.jar.sha1 @@ -0,0 +1 @@ +b7b4baf5f9af72d5eb8a231dfb114ae31c57150d \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.30.1.jar.sha1 deleted file mode 100644 index 40537a399ab14..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -58f1a09e89955e6145babf8bcdf80c95174eb817 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..8f653922d6418 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-logging-1.31.0.jar.sha1 @@ -0,0 +1 @@ +260e5363dad83a0ae65c16ad6a3dd2914e0db201 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.30.1.jar.sha1 deleted file mode 100644 index e88b7514ee54d..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -15692246539571c41180aff2b55abe527b939a7b \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..103da4720de96 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-1.31.0.jar.sha1 @@ -0,0 +1 @@ +b6454464425dfd81519070caeca3824558a2f1ae \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.30.1.jar.sha1 deleted file mode 100644 index 86937743208c6..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -947cf43a6411c4a323e14594431040a476ad43e8 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..3db07532ceea9 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-otlp-common-1.31.0.jar.sha1 @@ -0,0 +1 @@ +d8c22b6851bbc3dbf5d2387b9bde158ed5416ba4 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.30.1.jar.sha1 deleted file mode 100644 index 068926277253c..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -9f3a14515500e4df260ce7b10a668237a95ac791 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..10d9b7cdfe3e3 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-exporter-sender-okhttp-1.31.0.jar.sha1 @@ -0,0 +1 @@ +dd209381d58cfe81a989e29c9ca26d97c8dabd7a \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.30.1-alpha.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.30.1-alpha.jar.sha1 deleted file mode 100644 index bde43937e82e4..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.30.1-alpha.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -bfcea9bd71f97dd4e8a4f92c15ba5659fb07ff05 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.31.0-alpha.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.31.0-alpha.jar.sha1 new file mode 100644 index 0000000000000..162890965a6eb --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-extension-incubator-1.31.0-alpha.jar.sha1 @@ -0,0 +1 @@ +6c9f5c063309d92b6dd28bff0667f54b63afd36f \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.30.1.jar.sha1 deleted file mode 100644 index d425ed61cc4cd..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4d15a9ea26e8e6ea93287a9f4ee02d91e5a74392 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..d6ce31a31cc6f --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-1.31.0.jar.sha1 @@ -0,0 +1 @@ +2b2093be08a09ac536292bf6cecf8129cc7fb191 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.30.1.jar.sha1 deleted file mode 100644 index 6b32d98b0f7c7..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8e437ba87004bb63069d04fb06beae65b98dd13a \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..8a6a9705d836d --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-common-1.31.0.jar.sha1 @@ -0,0 +1 @@ +f492528288236e097e12fc1c45963dd82c70d33c \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.30.1.jar.sha1 deleted file mode 100644 index 13ef6de11e82d..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -5985d0950746ad12b49cc42c063f26ddfbcaaacb \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..37d79f5c573f7 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-logs-1.31.0.jar.sha1 @@ -0,0 +1 @@ +a63a203d3dc6f8875f8c26b9e3b522dc9a3f6280 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.30.1.jar.sha1 deleted file mode 100644 index fc5aad9c9011e..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -b12825541c5dae52a0fb35045c1b36df3ca8f632 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..80179e4808f50 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-metrics-1.31.0.jar.sha1 @@ -0,0 +1 @@ +47cc23762fae728d68e4fda1dfb71986ae0b8b3e \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.30.1.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.30.1.jar.sha1 deleted file mode 100644 index ac522b765da05..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.30.1.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -4c5531fbc44178a7bcfeb7021ae80e70a7c43458 \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.31.0.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.31.0.jar.sha1 new file mode 100644 index 0000000000000..fd917a58ba77c --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-sdk-trace-1.31.0.jar.sha1 @@ -0,0 +1 @@ +a3941197cfb8ae9eb9e482073480c0c3918b746c \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-semconv-1.21.0-alpha.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-semconv-1.21.0-alpha.jar.sha1 new file mode 100644 index 0000000000000..77b12c99464f6 --- /dev/null +++ b/plugins/telemetry-otel/licenses/opentelemetry-semconv-1.21.0-alpha.jar.sha1 @@ -0,0 +1 @@ +207660e74d1e155272e9559fd4d27854b92fc6ac \ No newline at end of file diff --git a/plugins/telemetry-otel/licenses/opentelemetry-semconv-1.30.1-alpha.jar.sha1 b/plugins/telemetry-otel/licenses/opentelemetry-semconv-1.30.1-alpha.jar.sha1 deleted file mode 100644 index 089a2484dd1d5..0000000000000 --- a/plugins/telemetry-otel/licenses/opentelemetry-semconv-1.30.1-alpha.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -8e8f7a97a4896a81846553275b9d61885be7ef50 \ No newline at end of file diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/IntegrationTestOTelTelemetryPlugin.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/IntegrationTestOTelTelemetryPlugin.java similarity index 85% rename from plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/IntegrationTestOTelTelemetryPlugin.java rename to plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/IntegrationTestOTelTelemetryPlugin.java index ed4d13f3abb7d..45caf8bf5f60b 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/IntegrationTestOTelTelemetryPlugin.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/IntegrationTestOTelTelemetryPlugin.java @@ -6,12 +6,9 @@ * compatible open source license. */ -package org.opensearch.telemetry.tracing; +package org.opensearch.telemetry; import org.opensearch.common.settings.Settings; -import org.opensearch.telemetry.OTelTelemetryPlugin; -import org.opensearch.telemetry.Telemetry; -import org.opensearch.telemetry.TelemetrySettings; import java.util.Optional; diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java new file mode 100644 index 0000000000000..74fc872cb30e3 --- /dev/null +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/InMemorySingletonMetricsExporter.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import java.util.Collection; +import java.util.List; + +import io.opentelemetry.sdk.common.CompletableResultCode; +import io.opentelemetry.sdk.metrics.InstrumentType; +import io.opentelemetry.sdk.metrics.data.AggregationTemporality; +import io.opentelemetry.sdk.metrics.data.MetricData; +import io.opentelemetry.sdk.metrics.export.MetricExporter; +import io.opentelemetry.sdk.testing.exporter.InMemoryMetricExporter; + +public class InMemorySingletonMetricsExporter implements MetricExporter { + + public static final InMemorySingletonMetricsExporter INSTANCE = new InMemorySingletonMetricsExporter(InMemoryMetricExporter.create()); + + private static InMemoryMetricExporter delegate; + + public static InMemorySingletonMetricsExporter create() { + return INSTANCE; + } + + private InMemorySingletonMetricsExporter(InMemoryMetricExporter delegate) { + InMemorySingletonMetricsExporter.delegate = delegate; + } + + @Override + public CompletableResultCode export(Collection metrics) { + return delegate.export(metrics); + } + + @Override + public CompletableResultCode flush() { + return delegate.flush(); + } + + @Override + public CompletableResultCode shutdown() { + return delegate.shutdown(); + } + + public List getFinishedMetricItems() { + return delegate.getFinishedMetricItems(); + } + + /** + * Clears the state. + */ + public void reset() { + delegate.reset(); + } + + @Override + public AggregationTemporality getAggregationTemporality(InstrumentType instrumentType) { + return delegate.getAggregationTemporality(instrumentType); + } +} diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java new file mode 100644 index 0000000000000..bcdcb657c4f42 --- /dev/null +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsDisabledSanityIT.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; +import org.opensearch.telemetry.OTelTelemetrySettings; +import org.opensearch.telemetry.TelemetrySettings; +import org.opensearch.telemetry.metrics.noop.NoopCounter; +import org.opensearch.telemetry.metrics.noop.NoopMetricsRegistry; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.util.Arrays; +import java.util.Collection; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, minNumDataNodes = 1) +public class TelemetryMetricsDisabledSanityIT extends OpenSearchIntegTestCase { + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING.getKey(), false) + .put( + OTelTelemetrySettings.OTEL_METRICS_EXPORTER_CLASS_SETTING.getKey(), + "org.opensearch.telemetry.metrics.InMemorySingletonMetricsExporter" + ) + .put(TelemetrySettings.METRICS_PUBLISH_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(1)) + .build(); + } + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(IntegrationTestOTelTelemetryPlugin.class); + } + + @Override + protected boolean addMockTelemetryPlugin() { + return false; + } + + public void testSanityChecksWhenMetricsDisabled() throws Exception { + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + + Counter counter = metricsRegistry.createCounter("test-counter", "test", "1"); + counter.add(1.0); + + Thread.sleep(2000); + + assertTrue(metricsRegistry instanceof NoopMetricsRegistry); + assertTrue(counter instanceof NoopCounter); + } + +} diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java new file mode 100644 index 0000000000000..ed341595d327d --- /dev/null +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/metrics/TelemetryMetricsEnabledSanityIT.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.telemetry.metrics; + +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; +import org.opensearch.telemetry.OTelTelemetrySettings; +import org.opensearch.telemetry.TelemetrySettings; +import org.opensearch.test.OpenSearchIntegTestCase; +import org.junit.After; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.stream.Collectors; + +import io.opentelemetry.sdk.metrics.data.DoublePointData; + +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE, minNumDataNodes = 1) +public class TelemetryMetricsEnabledSanityIT extends OpenSearchIntegTestCase { + + @Override + protected Settings nodeSettings(int nodeOrdinal) { + return Settings.builder() + .put(super.nodeSettings(nodeOrdinal)) + .put(TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING.getKey(), true) + .put( + OTelTelemetrySettings.OTEL_METRICS_EXPORTER_CLASS_SETTING.getKey(), + "org.opensearch.telemetry.metrics.InMemorySingletonMetricsExporter" + ) + .put(TelemetrySettings.METRICS_PUBLISH_INTERVAL_SETTING.getKey(), TimeValue.timeValueSeconds(1)) + .build(); + } + + @Override + protected Collection> nodePlugins() { + return Arrays.asList(IntegrationTestOTelTelemetryPlugin.class); + } + + @Override + protected boolean addMockTelemetryPlugin() { + return false; + } + + public void testCounter() throws Exception { + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + InMemorySingletonMetricsExporter.INSTANCE.reset(); + + Counter counter = metricsRegistry.createCounter("test-counter", "test", "1"); + counter.add(1.0); + // Sleep for about 2s to wait for metrics to be published. + Thread.sleep(2000); + + InMemorySingletonMetricsExporter exporter = InMemorySingletonMetricsExporter.INSTANCE; + double value = ((DoublePointData) ((ArrayList) exporter.getFinishedMetricItems() + .stream() + .filter(a -> a.getName().equals("test-counter")) + .collect(Collectors.toList()) + .get(0) + .getDoubleSumData() + .getPoints()).get(0)).getValue(); + assertEquals(1.0, value, 0.0); + } + + public void testUpDownCounter() throws Exception { + + MetricsRegistry metricsRegistry = internalCluster().getInstance(MetricsRegistry.class); + InMemorySingletonMetricsExporter.INSTANCE.reset(); + + Counter counter = metricsRegistry.createUpDownCounter("test-up-down-counter", "test", "1"); + counter.add(1.0); + counter.add(-2.0); + // Sleep for about 2s to wait for metrics to be published. + Thread.sleep(2000); + + InMemorySingletonMetricsExporter exporter = InMemorySingletonMetricsExporter.INSTANCE; + double value = ((DoublePointData) ((ArrayList) exporter.getFinishedMetricItems() + .stream() + .filter(a -> a.getName().equals("test-up-down-counter")) + .collect(Collectors.toList()) + .get(0) + .getDoubleSumData() + .getPoints()).get(0)).getValue(); + assertEquals(-1.0, value, 0.0); + } + + @After + public void reset() { + InMemorySingletonMetricsExporter.INSTANCE.reset(); + } +} diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java index 949a58f6cab41..45ed140e1be94 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerDisabledSanityIT.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; import org.opensearch.telemetry.OTelTelemetrySettings; import org.opensearch.telemetry.TelemetrySettings; import org.opensearch.test.OpenSearchIntegTestCase; diff --git a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java index 8a49a0abf5512..f07f2b308e801 100644 --- a/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java +++ b/plugins/telemetry-otel/src/internalClusterTest/java/org/opensearch/telemetry/tracing/TelemetryTracerEnabledSanityIT.java @@ -12,6 +12,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.common.unit.TimeValue; import org.opensearch.plugins.Plugin; +import org.opensearch.telemetry.IntegrationTestOTelTelemetryPlugin; import org.opensearch.telemetry.OTelTelemetrySettings; import org.opensearch.telemetry.TelemetrySettings; import org.opensearch.telemetry.tracing.attributes.Attributes; @@ -88,9 +89,7 @@ public void testSanityChecksWhenTracingEnabled() throws Exception { ); InMemorySingletonSpanExporter exporter = InMemorySingletonSpanExporter.INSTANCE; - if (!exporter.getFinishedSpanItems().isEmpty()) { - validators.validate(exporter.getFinishedSpanItems(), 6); - } + validators.validate(exporter.getFinishedSpanItems(), 6); } private static void updateTelemetrySetting(Client client, boolean value) { diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java index b57876c9310f3..297ae8873636f 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/OTelTelemetryPlugin.java @@ -8,14 +8,13 @@ package org.opensearch.telemetry; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Settings; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.TelemetryPlugin; -import org.opensearch.telemetry.metrics.OTelMetricsTelemetry; import org.opensearch.telemetry.tracing.OTelResourceProvider; import org.opensearch.telemetry.tracing.OTelTelemetry; -import org.opensearch.telemetry.tracing.OTelTracingTelemetry; import java.util.Arrays; import java.util.List; @@ -37,6 +36,8 @@ public class OTelTelemetryPlugin extends Plugin implements TelemetryPlugin { private final Settings settings; + private RefCountedReleasable refCountedOpenTelemetry; + /** * Creates Otel plugin * @param settings cluster settings @@ -58,20 +59,32 @@ public List> getSettings() { @Override public Optional getTelemetry(TelemetrySettings telemetrySettings) { + initializeOpenTelemetrySdk(telemetrySettings); return Optional.of(telemetry(telemetrySettings)); } + private void initializeOpenTelemetrySdk(TelemetrySettings telemetrySettings) { + if (refCountedOpenTelemetry != null) { + return; + } + OpenTelemetrySdk openTelemetrySdk = OTelResourceProvider.get(telemetrySettings, settings); + refCountedOpenTelemetry = new RefCountedReleasable<>("openTelemetry", openTelemetrySdk, openTelemetrySdk::close); + } + @Override public String getName() { return OTEL_TRACER_NAME; } private Telemetry telemetry(TelemetrySettings telemetrySettings) { - final OpenTelemetrySdk openTelemetry = OTelResourceProvider.get(telemetrySettings, settings); - return new OTelTelemetry( - new OTelTracingTelemetry<>(openTelemetry, openTelemetry.getSdkTracerProvider()), - new OTelMetricsTelemetry<>(openTelemetry.getSdkMeterProvider()) - ); + return new OTelTelemetry(refCountedOpenTelemetry); + } + + @Override + public void close() { + if (refCountedOpenTelemetry != null) { + refCountedOpenTelemetry.close(); + } } } diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java index 8598e5976d20d..6160e5106c041 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetry.java @@ -8,6 +8,7 @@ package org.opensearch.telemetry.metrics; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelTelemetryPlugin; import java.io.Closeable; @@ -19,19 +20,24 @@ import io.opentelemetry.api.metrics.DoubleUpDownCounter; import io.opentelemetry.api.metrics.Meter; import io.opentelemetry.api.metrics.MeterProvider; +import io.opentelemetry.sdk.OpenTelemetrySdk; /** * OTel implementation for {@link MetricsTelemetry} */ public class OTelMetricsTelemetry implements MetricsTelemetry { + private final RefCountedReleasable refCountedOpenTelemetry; private final Meter otelMeter; private final T meterProvider; /** * Creates OTel based {@link MetricsTelemetry}. + * @param openTelemetry open telemetry. * @param meterProvider {@link MeterProvider} instance */ - public OTelMetricsTelemetry(T meterProvider) { + public OTelMetricsTelemetry(RefCountedReleasable openTelemetry, T meterProvider) { + this.refCountedOpenTelemetry = openTelemetry; + this.refCountedOpenTelemetry.incRef(); this.meterProvider = meterProvider; this.otelMeter = meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME); } @@ -63,5 +69,6 @@ public Counter createUpDownCounter(String name, String description, String unit) @Override public void close() throws IOException { meterProvider.close(); + refCountedOpenTelemetry.close(); } } diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelResourceProvider.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelResourceProvider.java index a6a1f12aab8a9..14a19f122c17b 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelResourceProvider.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelResourceProvider.java @@ -30,7 +30,7 @@ import io.opentelemetry.sdk.trace.export.BatchSpanProcessor; import io.opentelemetry.sdk.trace.export.SpanExporter; import io.opentelemetry.sdk.trace.samplers.Sampler; -import io.opentelemetry.semconv.resource.attributes.ResourceAttributes; +import io.opentelemetry.semconv.ResourceAttributes; import static org.opensearch.telemetry.OTelTelemetrySettings.TRACER_EXPORTER_BATCH_SIZE_SETTING; import static org.opensearch.telemetry.OTelTelemetrySettings.TRACER_EXPORTER_DELAY_SETTING; diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java index 282fabd43346b..0c697d2cc5e8c 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTelemetry.java @@ -8,34 +8,39 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.Telemetry; import org.opensearch.telemetry.metrics.MetricsTelemetry; +import org.opensearch.telemetry.metrics.OTelMetricsTelemetry; + +import io.opentelemetry.sdk.OpenTelemetrySdk; /** * Otel implementation of Telemetry */ public class OTelTelemetry implements Telemetry { - private final TracingTelemetry tracingTelemetry; - private final MetricsTelemetry metricsTelemetry; + private final RefCountedReleasable refCountedOpenTelemetry; /** * Creates Telemetry instance - * @param tracingTelemetry tracing telemetry - * @param metricsTelemetry metrics telemetry + + */ + /** + * Creates Telemetry instance + * @param refCountedOpenTelemetry open telemetry. */ - public OTelTelemetry(TracingTelemetry tracingTelemetry, MetricsTelemetry metricsTelemetry) { - this.tracingTelemetry = tracingTelemetry; - this.metricsTelemetry = metricsTelemetry; + public OTelTelemetry(RefCountedReleasable refCountedOpenTelemetry) { + this.refCountedOpenTelemetry = refCountedOpenTelemetry; } @Override public TracingTelemetry getTracingTelemetry() { - return tracingTelemetry; + return new OTelTracingTelemetry<>(refCountedOpenTelemetry, refCountedOpenTelemetry.get().getSdkTracerProvider()); } @Override public MetricsTelemetry getMetricsTelemetry() { - return metricsTelemetry; + return new OTelMetricsTelemetry<>(refCountedOpenTelemetry, refCountedOpenTelemetry.get().getSdkMeterProvider()); } } diff --git a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java index f88afe623fd56..af39617a8c744 100644 --- a/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java +++ b/plugins/telemetry-otel/src/main/java/org/opensearch/telemetry/tracing/OTelTracingTelemetry.java @@ -8,31 +8,33 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelAttributesConverter; import org.opensearch.telemetry.OTelTelemetryPlugin; import java.io.Closeable; import java.io.IOException; -import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.trace.TracerProvider; import io.opentelemetry.context.Context; +import io.opentelemetry.sdk.OpenTelemetrySdk; /** * OTel based Telemetry provider */ public class OTelTracingTelemetry implements TracingTelemetry { - private final OpenTelemetry openTelemetry; + private final RefCountedReleasable refCountedOpenTelemetry; private final T tracerProvider; private final io.opentelemetry.api.trace.Tracer otelTracer; /** * Creates OTel based {@link TracingTelemetry} - * @param openTelemetry OpenTelemetry instance + * @param refCountedOpenTelemetry OpenTelemetry instance * @param tracerProvider {@link TracerProvider} instance. */ - public OTelTracingTelemetry(OpenTelemetry openTelemetry, T tracerProvider) { - this.openTelemetry = openTelemetry; + public OTelTracingTelemetry(RefCountedReleasable refCountedOpenTelemetry, T tracerProvider) { + this.refCountedOpenTelemetry = refCountedOpenTelemetry; + this.refCountedOpenTelemetry.incRef(); this.tracerProvider = tracerProvider; this.otelTracer = tracerProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME); } @@ -40,6 +42,7 @@ public OTelTracingTelemetry(OpenTelemetry openTelemetry, T tracerProvider) { @Override public void close() throws IOException { tracerProvider.close(); + refCountedOpenTelemetry.close(); } @Override @@ -49,7 +52,7 @@ public Span createSpan(SpanCreationContext spanCreationContext, Span parentSpan) @Override public TracingContextPropagator getContextPropagator() { - return new OTelTracingContextPropagator(openTelemetry); + return new OTelTracingContextPropagator(refCountedOpenTelemetry.get()); } private Span createOtelSpan(SpanCreationContext spanCreationContext, Span parentSpan) { diff --git a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java index 233c93e6b9a36..9de575b69774a 100644 --- a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java +++ b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/metrics/OTelMetricsTelemetryTests.java @@ -8,11 +8,13 @@ package org.opensearch.telemetry.metrics; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelAttributesConverter; import org.opensearch.telemetry.OTelTelemetryPlugin; import org.opensearch.telemetry.metrics.tags.Tags; import org.opensearch.test.OpenSearchTestCase; +import io.opentelemetry.api.OpenTelemetry; import io.opentelemetry.api.metrics.DoubleCounter; import io.opentelemetry.api.metrics.DoubleCounterBuilder; import io.opentelemetry.api.metrics.DoubleUpDownCounter; @@ -34,12 +36,16 @@ public void testCounter() { String description = "test"; String unit = "1"; Meter mockMeter = mock(Meter.class); + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); DoubleCounter mockOTelDoubleCounter = mock(DoubleCounter.class); LongCounterBuilder mockOTelLongCounterBuilder = mock(LongCounterBuilder.class); DoubleCounterBuilder mockOTelDoubleCounterBuilder = mock(DoubleCounterBuilder.class); MeterProvider meterProvider = mock(MeterProvider.class); when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); - MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry(meterProvider); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); when(mockMeter.counterBuilder(counterName)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setDescription(description)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setUnit(unit)).thenReturn(mockOTelLongCounterBuilder); @@ -59,6 +65,7 @@ public void testCounterNegativeValue() { String counterName = "test-counter"; String description = "test"; String unit = "1"; + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); Meter mockMeter = mock(Meter.class); DoubleCounter mockOTelDoubleCounter = mock(DoubleCounter.class); LongCounterBuilder mockOTelLongCounterBuilder = mock(LongCounterBuilder.class); @@ -66,7 +73,10 @@ public void testCounterNegativeValue() { MeterProvider meterProvider = mock(MeterProvider.class); when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); - MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry(meterProvider); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); when(mockMeter.counterBuilder(counterName)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setDescription(description)).thenReturn(mockOTelLongCounterBuilder); when(mockOTelLongCounterBuilder.setUnit(unit)).thenReturn(mockOTelLongCounterBuilder); @@ -83,6 +93,7 @@ public void testUpDownCounter() { String counterName = "test-counter"; String description = "test"; String unit = "1"; + OpenTelemetry mockOpenTelemetry = mock(OpenTelemetry.class); Meter mockMeter = mock(Meter.class); DoubleUpDownCounter mockOTelUpDownDoubleCounter = mock(DoubleUpDownCounter.class); LongUpDownCounterBuilder mockOTelLongUpDownCounterBuilder = mock(LongUpDownCounterBuilder.class); @@ -90,7 +101,10 @@ public void testUpDownCounter() { MeterProvider meterProvider = mock(MeterProvider.class); when(meterProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockMeter); - MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry(meterProvider); + MetricsTelemetry metricsTelemetry = new OTelMetricsTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + meterProvider + ); when(mockMeter.upDownCounterBuilder(counterName)).thenReturn(mockOTelLongUpDownCounterBuilder); when(mockOTelLongUpDownCounterBuilder.setDescription(description)).thenReturn(mockOTelLongUpDownCounterBuilder); when(mockOTelLongUpDownCounterBuilder.setUnit(unit)).thenReturn(mockOTelLongUpDownCounterBuilder); diff --git a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java index 1a508ed252493..1f0c2f674e655 100644 --- a/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java +++ b/plugins/telemetry-otel/src/test/java/org/opensearch/telemetry/tracing/OTelTracingTelemetryTests.java @@ -8,6 +8,7 @@ package org.opensearch.telemetry.tracing; +import org.opensearch.common.concurrent.RefCountedReleasable; import org.opensearch.telemetry.OTelTelemetryPlugin; import org.opensearch.telemetry.tracing.attributes.Attributes; import org.opensearch.test.OpenSearchTestCase; @@ -37,7 +38,10 @@ public void testCreateSpanWithoutParent() { when(mockSpanBuilder.startSpan()).thenReturn(mock(io.opentelemetry.api.trace.Span.class)); when(mockSpanBuilder.setSpanKind(any(io.opentelemetry.api.trace.SpanKind.class))).thenReturn(mockSpanBuilder); Attributes attributes = Attributes.create().addAttribute("name", "value"); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); Span span = tracingTelemetry.createSpan(SpanCreationContext.internal().name("span_name").attributes(attributes), null); verify(mockSpanBuilder, never()).setParent(any()); verify(mockSpanBuilder).setAllAttributes(createAttribute(attributes)); @@ -59,7 +63,10 @@ public void testCreateSpanWithParent() { Span parentSpan = new OTelSpan("parent_span", mock(io.opentelemetry.api.trace.Span.class), null); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); Attributes attributes = Attributes.create().addAttribute("name", 1l); Span span = tracingTelemetry.createSpan(SpanCreationContext.internal().name("span_name").attributes(attributes), parentSpan); @@ -85,7 +92,10 @@ public void testCreateSpanWithParentWithMultipleAttributes() { Span parentSpan = new OTelSpan("parent_span", mock(io.opentelemetry.api.trace.Span.class), null); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); Attributes attributes = Attributes.create() .addAttribute("key1", 1l) .addAttribute("key2", 2.0) @@ -125,7 +135,10 @@ public void testGetContextPropagator() { TracerProvider mockTracerProvider = mock(TracerProvider.class); when(mockTracerProvider.get(OTelTelemetryPlugin.INSTRUMENTATION_SCOPE_NAME)).thenReturn(mockTracer); - TracingTelemetry tracingTelemetry = new OTelTracingTelemetry(mockOpenTelemetry, mockTracerProvider); + TracingTelemetry tracingTelemetry = new OTelTracingTelemetry( + new RefCountedReleasable("telemetry", mockOpenTelemetry, () -> {}), + mockTracerProvider + ); assertTrue(tracingTelemetry.getContextPropagator() instanceof OTelTracingContextPropagator); } diff --git a/release-notes/opensearch.release-notes-2.11.0.md b/release-notes/opensearch.release-notes-2.11.0.md index d7e9182f2a656..7ebf1b433c7c6 100644 --- a/release-notes/opensearch.release-notes-2.11.0.md +++ b/release-notes/opensearch.release-notes-2.11.0.md @@ -8,13 +8,16 @@ - Async blob read support for S3 plugin ([#9694](https://github.com/opensearch-project/OpenSearch/pull/9694)) - [Telemetry-Otel] Added support for OtlpGrpcSpanExporter exporter ([#9666](https://github.com/opensearch-project/OpenSearch/pull/9666)) - Async blob read support for encrypted containers ([#10131](https://github.com/opensearch-project/OpenSearch/pull/10131)) +- Implement Visitor Design pattern in QueryBuilder to enable the capability to traverse through the complex QueryBuilder tree. ([#10110](https://github.com/opensearch-project/OpenSearch/pull/10110)) - Add capability to restrict async durability mode for remote indexes ([#10189](https://github.com/opensearch-project/OpenSearch/pull/10189)) - Add Doc Status Counter for Indexing Engine ([#4562](https://github.com/opensearch-project/OpenSearch/issues/4562)) - Add unreferenced file cleanup count to merge stats ([#10204](https://github.com/opensearch-project/OpenSearch/pull/10204)) +- Configurable merge policy for index with an option to choose from LogByteSize and Tiered merge policy ([#9992](https://github.com/opensearch-project/OpenSearch/pull/9992)) - [Remote Store] Add support to restrict creation & deletion if system repository and mutation of immutable settings of system repository ([#9839](https://github.com/opensearch-project/OpenSearch/pull/9839)) - Improve compressed request handling ([#10261](https://github.com/opensearch-project/OpenSearch/pull/10261)) ### Dependencies +- Bump JNA version from 5.5 to 5.13 ([#9963](https://github.com/opensearch-project/OpenSearch/pull/9963)) - Bump `peter-evans/create-or-update-comment` from 2 to 3 ([#9575](https://github.com/opensearch-project/OpenSearch/pull/9575)) - Bump `actions/checkout` from 2 to 4 ([#9968](https://github.com/opensearch-project/OpenSearch/pull/9968)) - Bump OpenTelemetry from 1.26.0 to 1.30.1 ([#9950](https://github.com/opensearch-project/OpenSearch/pull/9950)) @@ -25,30 +28,22 @@ - Bump `com.zaxxer:SparseBitSet` from 1.2 to 1.3 ([#10098](https://github.com/opensearch-project/OpenSearch/pull/10098)) - Bump `tibdex/github-app-token` from 1.5.0 to 2.1.0 ([#10125](https://github.com/opensearch-project/OpenSearch/pull/10125)) - Bump `org.wiremock:wiremock-standalone` from 2.35.0 to 3.1.0 ([#9752](https://github.com/opensearch-project/OpenSearch/pull/9752)) +- Bump `org.eclipse.jgit` from 6.5.0 to 6.7.0 ([#10147](https://github.com/opensearch-project/OpenSearch/pull/10147)) +- Bump `codecov/codecov-action` from 2 to 3 ([#10209](https://github.com/opensearch-project/OpenSearch/pull/10209)) - Bump `com.google.http-client:google-http-client-jackson2` from 1.43.2 to 1.43.3 ([#10126](https://github.com/opensearch-project/OpenSearch/pull/10126)) - Bump `org.xerial.snappy:snappy-java` from 1.1.10.3 to 1.1.10.5 ([#10206](https://github.com/opensearch-project/OpenSearch/pull/10206), [#10299](https://github.com/opensearch-project/OpenSearch/pull/10299)) -- Bump `com.google.api.grpc:proto-google-common-protos` from 2.10.0 to 2.25.1 ([#10208](https://github.com/opensearch-project/OpenSearch/pull/10208), [#10298](https://github.com/opensearch-project/OpenSearch/pull/10298)) -- Bump `codecov/codecov-action` from 2 to 3 ([#10209](https://github.com/opensearch-project/OpenSearch/pull/10209)) - Bump `org.bouncycastle:bcpkix-jdk15to18` from 1.75 to 1.76 ([10219](https://github.com/opensearch-project/OpenSearch/pull/10219))` - Bump `org.bouncycastle:bcprov-jdk15to18` from 1.75 to 1.76 ([10219](https://github.com/opensearch-project/OpenSearch/pull/10219))` - Bump `org.bouncycastle:bcmail-jdk15to18` from 1.75 to 1.76 ([10219](https://github.com/opensearch-project/OpenSearch/pull/10219))` -- Bump Lucene from 9.7.0 to 9.8.0 ([10276](https://github.com/opensearch-project/OpenSearch/pull/10276)) -- Bump `com.netflix.nebula.ospackage-base` from 11.4.0 to 11.5.0 ([#10295](https://github.com/opensearch-project/OpenSearch/pull/10295)) - Bump asm from 9.5 to 9.6 ([#10302](https://github.com/opensearch-project/OpenSearch/pull/10302)) - Bump netty from 4.1.97.Final to 4.1.99.Final ([#10303](https://github.com/opensearch-project/OpenSearch/pull/10303)) - Bump `peter-evans/create-pull-request` from 3 to 5 ([#10301](https://github.com/opensearch-project/OpenSearch/pull/10301)) - Bump `org.apache.avro:avro` from 1.11.2 to 1.11.3 ([#10210](https://github.com/opensearch-project/OpenSearch/pull/10210)) -- Bump `org.bouncycastle:bc-fips` from 1.0.2.3 to 1.0.2.4 ([#10297](https://github.com/opensearch-project/OpenSearch/pull/10297)) -- Bump `org.apache.zookeeper:zookeeper` from 3.9.0 to 3.9.1 ([#10506](https://github.com/opensearch-project/OpenSearch/pull/10506)) -- Bump `de.thetaphi:forbiddenapis` from 3.5.1 to 3.6 ([#10508](https://github.com/opensearch-project/OpenSearch/pull/10508)) -- Bump `commons-io:commons-io` from 2.13.0 to 2.14.0 ([#10294](https://github.com/opensearch-project/OpenSearch/pull/10294)) - Bump `netty` from 4.1.99.Final to 4.1.100.Final ([#10564](https://github.com/opensearch-project/OpenSearch/pull/10564)) ### Changed - Add instrumentation in rest and network layer. ([#9415](https://github.com/opensearch-project/OpenSearch/pull/9415)) - Allow parameterization of tests with OpenSearchIntegTestCase.SuiteScopeTestCase annotation ([#9916](https://github.com/opensearch-project/OpenSearch/pull/9916)) -- Mute the query profile IT with concurrent execution ([#9840](https://github.com/opensearch-project/OpenSearch/pull/9840)) -- Force merge with `only_expunge_deletes` honors max segment size ([#10036](https://github.com/opensearch-project/OpenSearch/pull/10036)) - Add instrumentation in transport service. ([#10042](https://github.com/opensearch-project/OpenSearch/pull/10042)) - [Tracing Framework] Add support for SpanKind. ([#10122](https://github.com/opensearch-project/OpenSearch/pull/10122)) - Pass parent filter to inner query in nested query ([#10246](https://github.com/opensearch-project/OpenSearch/pull/10246)) @@ -58,7 +53,6 @@ - [Remote Store] Add support to reload repository metadata inplace ([#9569](https://github.com/opensearch-project/OpenSearch/pull/9569)) - [Metrics Framework] Add Metrics framework. ([#10241](https://github.com/opensearch-project/OpenSearch/pull/10241)) - Updating the separator for RemoteStoreLockManager since underscore is allowed in base64UUID url charset ([#10379](https://github.com/opensearch-project/OpenSearch/pull/10379)) -- Add the means to extract the contextual properties from HttpChannel, TcpCChannel and TrasportChannel without excessive typecasting ([#10562](https://github.com/opensearch-project/OpenSearch/pull/10562)) ### Removed - Remove spurious SGID bit on directories ([#9447](https://github.com/opensearch-project/OpenSearch/pull/9447)) @@ -68,8 +62,6 @@ - Fix broken backward compatibility from 2.7 for IndexSorted field indices ([#10045](https://github.com/opensearch-project/OpenSearch/pull/10045)) - Fix concurrent search NPE when track_total_hits, terminate_after and size=0 are used ([#10082](https://github.com/opensearch-project/OpenSearch/pull/10082)) - Fix remove ingest processor handing ignore_missing parameter not correctly ([10089](https://github.com/opensearch-project/OpenSearch/pull/10089)) -- Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101)) -- Fix circular dependency in Settings initialization ([10194](https://github.com/opensearch-project/OpenSearch/pull/10194)) - Fix registration and initialization of multiple extensions ([10256](https://github.com/opensearch-project/OpenSearch/pull/10256)) +- Fix circular dependency in Settings initialization ([10194](https://github.com/opensearch-project/OpenSearch/pull/10194)) - Fix Segment Replication ShardLockObtainFailedException bug during index corruption ([10370](https://github.com/opensearch-project/OpenSearch/pull/10370)) -- Fix some test methods in SimulatePipelineRequestParsingTests never run and fix test failure ([#10496](https://github.com/opensearch-project/OpenSearch/pull/10496)) diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java index 394d5cdfc26f3..6ce6ca40cbce4 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodeStats.java @@ -56,6 +56,7 @@ import org.opensearch.monitor.os.OsStats; import org.opensearch.monitor.process.ProcessStats; import org.opensearch.node.AdaptiveSelectionStats; +import org.opensearch.node.NodesResourceUsageStats; import org.opensearch.repositories.RepositoriesStats; import org.opensearch.script.ScriptCacheStats; import org.opensearch.script.ScriptStats; @@ -143,6 +144,9 @@ public class NodeStats extends BaseNodeResponse implements ToXContentFragment { @Nullable private SearchPipelineStats searchPipelineStats; + @Nullable + private NodesResourceUsageStats resourceUsageStats; + @Nullable private RepositoriesStats repositoriesStats; @@ -202,6 +206,11 @@ public NodeStats(StreamInput in) throws IOException { } else { searchPipelineStats = null; } + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { // make it 2.12 when we backport + resourceUsageStats = in.readOptionalWriteable(NodesResourceUsageStats::new); + } else { + resourceUsageStats = null; + } if (in.getVersion().onOrAfter(Version.V_3_0_0)) { repositoriesStats = in.readOptionalWriteable(RepositoriesStats::new); } else { @@ -225,6 +234,7 @@ public NodeStats( @Nullable DiscoveryStats discoveryStats, @Nullable IngestStats ingestStats, @Nullable AdaptiveSelectionStats adaptiveSelectionStats, + @Nullable NodesResourceUsageStats resourceUsageStats, @Nullable ScriptCacheStats scriptCacheStats, @Nullable IndexingPressureStats indexingPressureStats, @Nullable ShardIndexingPressureStats shardIndexingPressureStats, @@ -251,6 +261,7 @@ public NodeStats( this.discoveryStats = discoveryStats; this.ingestStats = ingestStats; this.adaptiveSelectionStats = adaptiveSelectionStats; + this.resourceUsageStats = resourceUsageStats; this.scriptCacheStats = scriptCacheStats; this.indexingPressureStats = indexingPressureStats; this.shardIndexingPressureStats = shardIndexingPressureStats; @@ -355,6 +366,11 @@ public AdaptiveSelectionStats getAdaptiveSelectionStats() { return adaptiveSelectionStats; } + @Nullable + public NodesResourceUsageStats getResourceUsageStats() { + return resourceUsageStats; + } + @Nullable public ScriptCacheStats getScriptCacheStats() { return scriptCacheStats; @@ -446,6 +462,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_9_0)) { out.writeOptionalWriteable(searchPipelineStats); } + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { // make it 2.12 when we backport + out.writeOptionalWriteable(resourceUsageStats); + } if (out.getVersion().onOrAfter(Version.V_3_0_0)) { out.writeOptionalWriteable(repositoriesStats); } @@ -539,10 +558,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws if (getSearchPipelineStats() != null) { getSearchPipelineStats().toXContent(builder, params); } + if (getResourceUsageStats() != null) { + getResourceUsageStats().toXContent(builder, params); + } if (getRepositoriesStats() != null) { getRepositoriesStats().toXContent(builder, params); } - return builder; } } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java index 896cb14afab37..88dff20354aa2 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/NodesStatsRequest.java @@ -214,6 +214,7 @@ public enum Metric { FILE_CACHE_STATS("file_cache"), TASK_CANCELLATION("task_cancellation"), SEARCH_PIPELINE("search_pipeline"), + RESOURCE_USAGE_STATS("resource_usage_stats"), REPOSITORIES("repositories"); private String metricName; diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java index 6ccbf83b0c915..aa02f8e580f4a 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/stats/TransportNodesStatsAction.java @@ -125,6 +125,7 @@ protected NodeStats nodeOperation(NodeStatsRequest nodeStatsRequest) { NodesStatsRequest.Metric.FILE_CACHE_STATS.containedIn(metrics), NodesStatsRequest.Metric.TASK_CANCELLATION.containedIn(metrics), NodesStatsRequest.Metric.SEARCH_PIPELINE.containedIn(metrics), + NodesStatsRequest.Metric.RESOURCE_USAGE_STATS.containedIn(metrics), NodesStatsRequest.Metric.REPOSITORIES.containedIn(metrics) ); } diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 1c0a1280ad550..14f57218ae1dc 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -215,6 +215,7 @@ public final void start() { 0, 0, buildTookInMillis(), + timeProvider.getPhaseTook(), ShardSearchFailure.EMPTY_ARRAY, clusters, null @@ -662,6 +663,7 @@ protected final SearchResponse buildSearchResponse( successfulOps.get(), skippedOps.get(), buildTookInMillis(), + timeProvider.getPhaseTook(), failures, clusters, searchContextId diff --git a/server/src/main/java/org/opensearch/action/search/MultiSearchRequest.java b/server/src/main/java/org/opensearch/action/search/MultiSearchRequest.java index da8f8f144eaf2..00e0345062d1c 100644 --- a/server/src/main/java/org/opensearch/action/search/MultiSearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/MultiSearchRequest.java @@ -277,6 +277,8 @@ public static void readMultiLineFormat( } else if ("cancel_after_time_interval".equals(entry.getKey()) || "cancelAfterTimeInterval".equals(entry.getKey())) { searchRequest.setCancelAfterTimeInterval(nodeTimeValue(value, null)); + } else if ("phase_took".equals(entry.getKey())) { + searchRequest.setPhaseTook(nodeBooleanValue(value)); } else { throw new IllegalArgumentException("key [" + entry.getKey() + "] is not supported in the metadata section"); } @@ -374,6 +376,9 @@ public static void writeSearchRequestParams(SearchRequest request, XContentBuild if (request.getCancelAfterTimeInterval() != null) { xContentBuilder.field("cancel_after_time_interval", request.getCancelAfterTimeInterval().getStringRep()); } + if (request.isPhaseTook() != null) { + xContentBuilder.field("phase_took", request.isPhaseTook()); + } xContentBuilder.endObject(); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java index 21cf0ed97b9da..9e50213eab5f9 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java @@ -117,6 +117,8 @@ public class SearchRequest extends ActionRequest implements IndicesRequest.Repla private String pipeline; + private Boolean phaseTook = null; + public SearchRequest() { this.localClusterAlias = null; this.absoluteStartMillis = DEFAULT_ABSOLUTE_START_MILLIS; @@ -209,6 +211,7 @@ private SearchRequest( this.absoluteStartMillis = absoluteStartMillis; this.finalReduce = finalReduce; this.cancelAfterTimeInterval = searchRequest.cancelAfterTimeInterval; + this.phaseTook = searchRequest.phaseTook; } /** @@ -253,6 +256,9 @@ public SearchRequest(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_7_0)) { pipeline = in.readOptionalString(); } + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + phaseTook = in.readOptionalBoolean(); + } } @Override @@ -284,6 +290,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_7_0)) { out.writeOptionalString(pipeline); } + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalBoolean(phaseTook); + } } @Override @@ -615,6 +624,20 @@ public void setPreFilterShardSize(int preFilterShardSize) { this.preFilterShardSize = preFilterShardSize; } + /** + * Returns value of user-provided phase_took query parameter for this search request. + */ + public Boolean isPhaseTook() { + return phaseTook; + } + + /** + * Sets value of phase_took query param if provided by user. Defaults to null. + */ + public void setPhaseTook(Boolean phaseTook) { + this.phaseTook = phaseTook; + } + /** * Returns a threshold that enforces a pre-filter roundtrip to pre-filter search shards based on query rewriting if the number of shards * the search request expands to exceeds the threshold, or null if the threshold is unspecified. @@ -719,7 +742,8 @@ public boolean equals(Object o) { && absoluteStartMillis == that.absoluteStartMillis && ccsMinimizeRoundtrips == that.ccsMinimizeRoundtrips && Objects.equals(cancelAfterTimeInterval, that.cancelAfterTimeInterval) - && Objects.equals(pipeline, that.pipeline); + && Objects.equals(pipeline, that.pipeline) + && Objects.equals(phaseTook, that.phaseTook); } @Override @@ -740,7 +764,8 @@ public int hashCode() { localClusterAlias, absoluteStartMillis, ccsMinimizeRoundtrips, - cancelAfterTimeInterval + cancelAfterTimeInterval, + phaseTook ); } @@ -783,6 +808,8 @@ public String toString() { + cancelAfterTimeInterval + ", pipeline=" + pipeline + + ", phaseTook=" + + phaseTook + "}"; } } diff --git a/server/src/main/java/org/opensearch/action/search/SearchResponse.java b/server/src/main/java/org/opensearch/action/search/SearchResponse.java index a546311a1f668..91f0dc0737637 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchResponse.java +++ b/server/src/main/java/org/opensearch/action/search/SearchResponse.java @@ -33,6 +33,7 @@ package org.opensearch.action.search; import org.apache.lucene.search.TotalHits; +import org.opensearch.Version; import org.opensearch.common.Nullable; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.xcontent.StatusToXContentObject; @@ -63,7 +64,9 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.function.Supplier; @@ -94,6 +97,7 @@ public class SearchResponse extends ActionResponse implements StatusToXContentOb private final ShardSearchFailure[] shardFailures; private final Clusters clusters; private final long tookInMillis; + private final PhaseTook phaseTook; public SearchResponse(StreamInput in) throws IOException { super(in); @@ -112,6 +116,11 @@ public SearchResponse(StreamInput in) throws IOException { clusters = new Clusters(in); scrollId = in.readOptionalString(); tookInMillis = in.readVLong(); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + phaseTook = in.readOptionalWriteable(PhaseTook::new); + } else { + phaseTook = null; + } skippedShards = in.readVInt(); pointInTimeId = in.readOptionalString(); } @@ -126,7 +135,32 @@ public SearchResponse( ShardSearchFailure[] shardFailures, Clusters clusters ) { - this(internalResponse, scrollId, totalShards, successfulShards, skippedShards, tookInMillis, shardFailures, clusters, null); + this(internalResponse, scrollId, totalShards, successfulShards, skippedShards, tookInMillis, null, shardFailures, clusters, null); + } + + public SearchResponse( + SearchResponseSections internalResponse, + String scrollId, + int totalShards, + int successfulShards, + int skippedShards, + long tookInMillis, + ShardSearchFailure[] shardFailures, + Clusters clusters, + String pointInTimeId + ) { + this( + internalResponse, + scrollId, + totalShards, + successfulShards, + skippedShards, + tookInMillis, + null, + shardFailures, + clusters, + pointInTimeId + ); } public SearchResponse( @@ -136,6 +170,7 @@ public SearchResponse( int successfulShards, int skippedShards, long tookInMillis, + PhaseTook phaseTook, ShardSearchFailure[] shardFailures, Clusters clusters, String pointInTimeId @@ -148,6 +183,7 @@ public SearchResponse( this.successfulShards = successfulShards; this.skippedShards = skippedShards; this.tookInMillis = tookInMillis; + this.phaseTook = phaseTook; this.shardFailures = shardFailures; assert skippedShards <= totalShards : "skipped: " + skippedShards + " total: " + totalShards; assert scrollId == null || pointInTimeId == null : "SearchResponse can't have both scrollId [" @@ -210,6 +246,13 @@ public TimeValue getTook() { return new TimeValue(tookInMillis); } + /** + * How long the request took in each search phase. + */ + public PhaseTook getPhaseTook() { + return phaseTook; + } + /** * The total number of shards the search was executed on. */ @@ -298,6 +341,9 @@ public XContentBuilder innerToXContent(XContentBuilder builder, Params params) t builder.field(POINT_IN_TIME_ID.getPreferredName(), pointInTimeId); } builder.field(TOOK.getPreferredName(), tookInMillis); + if (phaseTook != null) { + phaseTook.toXContent(builder, params); + } builder.field(TIMED_OUT.getPreferredName(), isTimedOut()); if (isTerminatedEarly() != null) { builder.field(TERMINATED_EARLY.getPreferredName(), isTerminatedEarly()); @@ -337,6 +383,7 @@ public static SearchResponse innerFromXContent(XContentParser parser) throws IOE Boolean terminatedEarly = null; int numReducePhases = 1; long tookInMillis = -1; + PhaseTook phaseTook = null; int successfulShards = -1; int totalShards = -1; int skippedShards = 0; // 0 for BWC @@ -401,6 +448,24 @@ public static SearchResponse innerFromXContent(XContentParser parser) throws IOE parser.skipChildren(); } } + } else if (PhaseTook.PHASE_TOOK.match(currentFieldName, parser.getDeprecationHandler())) { + Map phaseTookMap = new HashMap<>(); + + while ((token = parser.nextToken()) != Token.END_OBJECT) { + if (token == Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token.isValue()) { + try { + SearchPhaseName.valueOf(currentFieldName.toUpperCase(Locale.ROOT)); + phaseTookMap.put(currentFieldName, parser.longValue()); + } catch (final IllegalArgumentException ex) { + parser.skipChildren(); + } + } else { + parser.skipChildren(); + } + } + phaseTook = new PhaseTook(phaseTookMap); } else if (Clusters._CLUSTERS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { int successful = -1; int total = -1; @@ -472,6 +537,7 @@ public static SearchResponse innerFromXContent(XContentParser parser) throws IOE successfulShards, skippedShards, tookInMillis, + phaseTook, failures.toArray(ShardSearchFailure.EMPTY_ARRAY), clusters, searchContextId @@ -491,6 +557,9 @@ public void writeTo(StreamOutput out) throws IOException { clusters.writeTo(out); out.writeOptionalString(scrollId); out.writeVLong(tookInMillis); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalWriteable(phaseTook); + } out.writeVInt(skippedShards); out.writeOptionalString(pointInTimeId); } @@ -604,6 +673,67 @@ public String toString() { } } + /** + * Holds info about the clusters that the search was executed on: how many in total, how many of them were successful + * and how many of them were skipped. + * + * @opensearch.internal + */ + public static class PhaseTook implements ToXContentFragment, Writeable { + static final ParseField PHASE_TOOK = new ParseField("phase_took"); + private final Map phaseTookMap; + + public PhaseTook(Map phaseTookMap) { + this.phaseTookMap = phaseTookMap; + } + + private PhaseTook(StreamInput in) throws IOException { + this(in.readMap(StreamInput::readString, StreamInput::readLong)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeMap(phaseTookMap, StreamOutput::writeString, StreamOutput::writeLong); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(PHASE_TOOK.getPreferredName()); + + for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { + if (phaseTookMap.containsKey(searchPhaseName.getName())) { + builder.field(searchPhaseName.getName(), phaseTookMap.get(searchPhaseName.getName())); + } else { + builder.field(searchPhaseName.getName(), 0); + } + } + builder.endObject(); + return builder; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + PhaseTook phaseTook = (PhaseTook) o; + + if (phaseTook.phaseTookMap.equals(phaseTookMap)) { + return true; + } else { + return false; + } + } + + @Override + public int hashCode() { + return Objects.hash(phaseTookMap); + } + } + static SearchResponse empty(Supplier tookInMillisSupplier, Clusters clusters) { SearchHits searchHits = new SearchHits(new SearchHit[0], new TotalHits(0L, TotalHits.Relation.EQUAL_TO), Float.NaN); InternalSearchResponse internalSearchResponse = new InternalSearchResponse( diff --git a/server/src/main/java/org/opensearch/action/search/SearchResponseMerger.java b/server/src/main/java/org/opensearch/action/search/SearchResponseMerger.java index f90e98106f93f..054bd578cc56c 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchResponseMerger.java +++ b/server/src/main/java/org/opensearch/action/search/SearchResponseMerger.java @@ -236,6 +236,7 @@ SearchResponse getMergedResponse(SearchResponse.Clusters clusters) { successfulShards, skippedShards, tookInMillis, + searchTimeProvider.getPhaseTook(), shardFailures, clusters, null diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index cff1005beff27..284f71bd9da62 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -98,6 +98,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -154,6 +155,14 @@ public class TransportSearchAction extends HandledTransportAction SEARCH_PHASE_TOOK_ENABLED = Setting.boolSetting( + SEARCH_PHASE_TOOK_ENABLED_KEY, + false, + Property.Dynamic, + Property.NodeScope + ); + private final NodeClient client; private final ThreadPool threadPool; private final ClusterService clusterService; @@ -252,6 +261,8 @@ private Map resolveIndexBoosts(SearchRequest searchRequest, Clust } /** + * Listener to track request-level tookTime and phase tookTimes from the coordinator. + * * Search operations need two clocks. One clock is to fulfill real clock needs (e.g., resolving * "now" to an index name). Another clock is needed for measuring how long a search operation * took. These two uses are at odds with each other. There are many issues with using a real @@ -261,11 +272,12 @@ private Map resolveIndexBoosts(SearchRequest searchRequest, Clust * * @opensearch.internal */ - static final class SearchTimeProvider { + static final class SearchTimeProvider implements SearchRequestOperationsListener { private final long absoluteStartMillis; private final long relativeStartNanos; private final LongSupplier relativeCurrentNanosProvider; + private boolean phaseTook = false; /** * Instantiates a new search time provider. The absolute start time is the real clock time @@ -291,6 +303,47 @@ long getAbsoluteStartMillis() { long buildTookInMillis() { return TimeUnit.NANOSECONDS.toMillis(relativeCurrentNanosProvider.getAsLong() - relativeStartNanos); } + + public void setPhaseTook(boolean phaseTook) { + this.phaseTook = phaseTook; + } + + public boolean isPhaseTook() { + return phaseTook; + } + + SearchResponse.PhaseTook getPhaseTook() { + if (phaseTook) { + Map phaseTookMap = new HashMap<>(); + // Convert Map to Map for SearchResponse() + for (SearchPhaseName searchPhaseName : phaseStatsMap.keySet()) { + phaseTookMap.put(searchPhaseName.getName(), phaseStatsMap.get(searchPhaseName)); + } + return new SearchResponse.PhaseTook(phaseTookMap); + } else { + return null; + } + } + + Map phaseStatsMap = new EnumMap<>(SearchPhaseName.class); + + @Override + public void onPhaseStart(SearchPhaseContext context) {} + + @Override + public void onPhaseEnd(SearchPhaseContext context) { + phaseStatsMap.put( + context.getCurrentPhase().getSearchPhaseName(), + TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - context.getCurrentPhase().getStartTimeInNanos()) + ); + } + + @Override + public void onPhaseFailure(SearchPhaseContext context) {} + + public Long getPhaseTookTime(SearchPhaseName searchPhaseName) { + return phaseStatsMap.get(searchPhaseName); + } } @Override @@ -332,13 +385,6 @@ public void executeRequest( SinglePhaseSearchAction phaseSearchAction, ActionListener listener ) { - final List searchListenersList = createSearchListenerList(); - final SearchRequestOperationsListener searchRequestOperationsListener; - if (!CollectionUtils.isEmpty(searchListenersList)) { - searchRequestOperationsListener = new SearchRequestOperationsListener.CompositeListener(searchListenersList, logger); - } else { - searchRequestOperationsListener = null; - } executeRequest(task, searchRequest, new SearchAsyncActionProvider() { @Override public AbstractSearchAsyncAction asyncSearchAction( @@ -355,7 +401,8 @@ public AbstractSearchAsyncAction asyncSearchAction( ActionListener listener, boolean preFilter, ThreadPool threadPool, - SearchResponse.Clusters clusters + SearchResponse.Clusters clusters, + SearchRequestOperationsListener searchRequestOperationsListener ) { return new AbstractSearchAsyncAction( actionName, @@ -419,6 +466,16 @@ private void executeRequest( relativeStartNanos, System::nanoTime ); + + final List searchListenersList = createSearchListenerList(originalSearchRequest, timeProvider); + + final SearchRequestOperationsListener searchRequestOperationsListener; + if (!CollectionUtils.isEmpty(searchListenersList)) { + searchRequestOperationsListener = new SearchRequestOperationsListener.CompositeListener(searchListenersList, logger); + } else { + searchRequestOperationsListener = null; + } + PipelinedRequest searchRequest; ActionListener listener; try { @@ -462,7 +519,8 @@ private void executeRequest( clusterState, listener, searchContext, - searchAsyncActionProvider + searchAsyncActionProvider, + searchRequestOperationsListener ); } else { if (shouldMinimizeRoundtrips(searchRequest)) { @@ -483,7 +541,8 @@ private void executeRequest( clusterState, l, searchContext, - searchAsyncActionProvider + searchAsyncActionProvider, + searchRequestOperationsListener ) ); } else { @@ -533,7 +592,8 @@ private void executeRequest( listener, new SearchResponse.Clusters(totalClusters, successfulClusters, skippedClusters.get()), searchContext, - searchAsyncActionProvider + searchAsyncActionProvider, + searchRequestOperationsListener ); }, listener::onFailure) ); @@ -622,6 +682,7 @@ public void onResponse(SearchResponse searchResponse) { searchResponse.getSuccessfulShards(), searchResponse.getSkippedShards(), timeProvider.buildTookInMillis(), + timeProvider.getPhaseTook(), searchResponse.getShardFailures(), new SearchResponse.Clusters(1, 1, 0), searchResponse.pointInTimeId() @@ -811,7 +872,8 @@ private void executeLocalSearch( ClusterState clusterState, ActionListener listener, SearchContextId searchContext, - SearchAsyncActionProvider searchAsyncActionProvider + SearchAsyncActionProvider searchAsyncActionProvider, + SearchRequestOperationsListener searchRequestOperationsListener ) { executeSearch( (SearchTask) task, @@ -825,7 +887,8 @@ private void executeLocalSearch( listener, SearchResponse.Clusters.EMPTY, searchContext, - searchAsyncActionProvider + searchAsyncActionProvider, + searchRequestOperationsListener ); } @@ -943,7 +1006,8 @@ private void executeSearch( ActionListener listener, SearchResponse.Clusters clusters, @Nullable SearchContextId searchContext, - SearchAsyncActionProvider searchAsyncActionProvider + SearchAsyncActionProvider searchAsyncActionProvider, + SearchRequestOperationsListener searchRequestOperationsListener ) { clusterState.blocks().globalBlockedRaiseException(ClusterBlockLevel.READ); // TODO: I think startTime() should become part of ActionRequest and that should be used both for index name @@ -1044,7 +1108,8 @@ private void executeSearch( listener, preFilterSearchShards, threadPool, - clusters + clusters, + searchRequestOperationsListener ).start(); } @@ -1127,15 +1192,30 @@ AbstractSearchAsyncAction asyncSearchAction( ActionListener listener, boolean preFilter, ThreadPool threadPool, - SearchResponse.Clusters clusters + SearchResponse.Clusters clusters, + SearchRequestOperationsListener searchRequestOperationsListener ); } - private List createSearchListenerList() { + private List createSearchListenerList(SearchRequest searchRequest, SearchTimeProvider timeProvider) { final List searchListenersList = new ArrayList<>(); + if (isRequestStatsEnabled) { searchListenersList.add(searchRequestStats); } + + // phase_took is enabled with request param and/or cluster setting + Boolean phaseTookRequestParam = searchRequest.isPhaseTook(); + if (phaseTookRequestParam == null) { // check cluster setting only when request param is undefined + if (clusterService.getClusterSettings().get(TransportSearchAction.SEARCH_PHASE_TOOK_ENABLED)) { + timeProvider.setPhaseTook(true); + searchListenersList.add(timeProvider); + } + } else if (phaseTookRequestParam == true) { + timeProvider.setPhaseTook(true); + searchListenersList.add(timeProvider); + } + return searchListenersList; } @@ -1153,15 +1233,9 @@ private AbstractSearchAsyncAction searchAsyncAction ActionListener listener, boolean preFilter, ThreadPool threadPool, - SearchResponse.Clusters clusters + SearchResponse.Clusters clusters, + SearchRequestOperationsListener searchRequestOperationsListener ) { - final List searchListenersList = createSearchListenerList(); - final SearchRequestOperationsListener searchRequestOperationsListener; - if (!CollectionUtils.isEmpty(searchListenersList)) { - searchRequestOperationsListener = new SearchRequestOperationsListener.CompositeListener(searchListenersList, logger); - } else { - searchRequestOperationsListener = null; - } if (preFilter) { return new CanMatchPreFilterSearchPhase( logger, @@ -1192,7 +1266,8 @@ private AbstractSearchAsyncAction searchAsyncAction listener, false, threadPool, - clusters + clusters, + searchRequestOperationsListener ); return new SearchPhase(action.getName()) { @Override diff --git a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java index 4cd3490cffb4c..90f91dcb7c553 100644 --- a/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/ClusterSettings.java @@ -130,6 +130,7 @@ import org.opensearch.node.Node.DiscoverySettings; import org.opensearch.node.NodeRoleSettings; import org.opensearch.node.remotestore.RemoteStoreNodeService; +import org.opensearch.node.resource.tracker.ResourceTrackerSettings; import org.opensearch.persistent.PersistentTasksClusterService; import org.opensearch.persistent.decider.EnableAssignmentDecider; import org.opensearch.plugins.PluginsService; @@ -375,6 +376,7 @@ public void apply(Settings value, Settings current, Settings previous) { TransportSearchAction.SHARD_COUNT_LIMIT_SETTING, TransportSearchAction.SEARCH_CANCEL_AFTER_TIME_INTERVAL_SETTING, TransportSearchAction.SEARCH_REQUEST_STATS_ENABLED, + TransportSearchAction.SEARCH_PHASE_TOOK_ENABLED, RemoteClusterService.REMOTE_CLUSTER_SKIP_UNAVAILABLE, SniffConnectionStrategy.REMOTE_CONNECTIONS_PER_CLUSTER, RemoteClusterService.REMOTE_INITIAL_CONNECTION_TIMEOUT_SETTING, @@ -654,6 +656,10 @@ public void apply(Settings value, Settings current, Settings previous) { SegmentReplicationPressureService.MAX_REPLICATION_LIMIT_STALE_REPLICA_SETTING, SegmentReplicationPressureService.MAX_ALLOWED_STALE_SHARDS, + // Settings related to resource trackers + ResourceTrackerSettings.GLOBAL_CPU_USAGE_AC_WINDOW_DURATION_SETTING, + ResourceTrackerSettings.GLOBAL_JVM_USAGE_AC_WINDOW_DURATION_SETTING, + // Settings related to Searchable Snapshots Node.NODE_SEARCH_CACHE_SIZE_SETTING, FileCache.DATA_TO_FILE_CACHE_SIZE_RATIO_SETTING, @@ -694,6 +700,12 @@ public void apply(Settings value, Settings current, Settings previous) { SearchService.CONCURRENT_SEGMENT_SEARCH_TARGET_MAX_SLICE_COUNT_SETTING ), List.of(FeatureFlags.TELEMETRY), - List.of(TelemetrySettings.TRACER_ENABLED_SETTING, TelemetrySettings.TRACER_SAMPLER_PROBABILITY) + List.of( + TelemetrySettings.TRACER_ENABLED_SETTING, + TelemetrySettings.TRACER_SAMPLER_PROBABILITY, + TelemetrySettings.METRICS_PUBLISH_INTERVAL_SETTING, + TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING, + TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING + ) ); } diff --git a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java index 4a8a0618ffa60..0cf97de53d5f3 100644 --- a/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java +++ b/server/src/main/java/org/opensearch/gateway/remote/RemoteClusterStateService.java @@ -120,6 +120,9 @@ public class RemoteClusterStateService implements Closeable { private final AtomicBoolean deleteStaleMetadataRunning = new AtomicBoolean(false); + public static final int INDEX_METADATA_CURRENT_CODEC_VERSION = 1; + public static final int MANIFEST_CURRENT_CODEC_VERSION = 1; + public RemoteClusterStateService( String nodeId, Supplier repositoriesService, @@ -426,7 +429,7 @@ private ClusterMetadataManifest uploadManifest( boolean committed ) throws IOException { synchronized (this) { - final String manifestFileName = getManifestFileName(clusterState.term(), clusterState.version()); + final String manifestFileName = getManifestFileName(clusterState.term(), clusterState.version(), committed); final ClusterMetadataManifest manifest = new ClusterMetadataManifest( clusterState.term(), clusterState.getVersion(), @@ -488,22 +491,30 @@ private void setSlowWriteLoggingThreshold(TimeValue slowWriteLoggingThreshold) { this.slowWriteLoggingThreshold = slowWriteLoggingThreshold; } - private static String getManifestFileName(long term, long version) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest_2147483642_2147483637_456536447 - return String.join(DELIMITER, getManifestFileNamePrefix(term, version), RemoteStoreUtils.invertLong(System.currentTimeMillis())); - } - - private static String getManifestFileNamePrefix(long term, long version) { - // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest_2147483642_2147483637 - return String.join(DELIMITER, MANIFEST_PATH_TOKEN, RemoteStoreUtils.invertLong(term), RemoteStoreUtils.invertLong(version)); + static String getManifestFileName(long term, long version, boolean committed) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/manifest/manifest______C/P____ + return String.join( + DELIMITER, + MANIFEST_PATH_TOKEN, + RemoteStoreUtils.invertLong(term), + RemoteStoreUtils.invertLong(version), + (committed ? "C" : "P"), // C for committed and P for published + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(MANIFEST_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last place to + // determine codec version. + ); } - private static String indexMetadataFileName(IndexMetadata indexMetadata) { + static String indexMetadataFileName(IndexMetadata indexMetadata) { + // 123456789012_test-cluster/cluster-state/dsgYj10Nkso7/index//metadata______ return String.join( DELIMITER, INDEX_METADATA_FILE_PREFIX, - String.valueOf(indexMetadata.getVersion()), - String.valueOf(System.currentTimeMillis()) + RemoteStoreUtils.invertLong(indexMetadata.getVersion()), + RemoteStoreUtils.invertLong(System.currentTimeMillis()), + String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION) // Keep the codec version at last place only, during read we reads last + // place to determine codec version. ); } @@ -916,8 +927,7 @@ private void deleteClusterMetadata( if (filesToKeep.contains(uploadedIndexMetadata.getUploadedFilename()) == false) { staleIndexMetadataPaths.add( new BlobPath().add(INDEX_PATH_TOKEN).add(uploadedIndexMetadata.getIndexUUID()).buildAsString() - + uploadedIndexMetadata.getUploadedFilename() - + ".dat" + + INDEX_METADATA_FORMAT.blobName(uploadedIndexMetadata.getUploadedFilename()) ); } }); diff --git a/server/src/main/java/org/opensearch/identity/noop/NoopTokenManager.java b/server/src/main/java/org/opensearch/identity/noop/NoopTokenManager.java index 1dc3a58916b5c..fa6643b7447dc 100644 --- a/server/src/main/java/org/opensearch/identity/noop/NoopTokenManager.java +++ b/server/src/main/java/org/opensearch/identity/noop/NoopTokenManager.java @@ -50,9 +50,4 @@ public String asAuthHeaderValue() { } }; } - - @Override - public Subject authenticateToken(AuthToken authToken) { - return null; - } } diff --git a/server/src/main/java/org/opensearch/identity/tokens/TokenManager.java b/server/src/main/java/org/opensearch/identity/tokens/TokenManager.java index b4048251a06a2..972a9a1080955 100644 --- a/server/src/main/java/org/opensearch/identity/tokens/TokenManager.java +++ b/server/src/main/java/org/opensearch/identity/tokens/TokenManager.java @@ -30,11 +30,4 @@ public interface TokenManager { * @return a new auth token */ public AuthToken issueServiceAccountToken(final String audience); - - /** - * Authenticates a provided authToken - * @param authToken: The authToken to authenticate - * @return The authenticated subject - */ - public Subject authenticateToken(AuthToken authToken); } diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentStats.java b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentStats.java index c7863536adf20..4228ec60c4524 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentStats.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentStats.java @@ -8,6 +8,7 @@ package org.opensearch.index.remote; +import org.opensearch.Version; import org.opensearch.action.admin.cluster.remotestore.stats.RemoteStoreStats; import org.opensearch.common.unit.TimeValue; import org.opensearch.core.common.io.stream.StreamInput; @@ -75,6 +76,10 @@ public class RemoteSegmentStats implements Writeable, ToXContentFragment { * Total time spent in downloading segments from remote store */ private long totalDownloadTime; + /** + * Total rejections due to remote store upload backpressure + */ + private long totalRejections; public RemoteSegmentStats() {} @@ -90,6 +95,9 @@ public RemoteSegmentStats(StreamInput in) throws IOException { totalRefreshBytesLag = in.readLong(); totalUploadTime = in.readLong(); totalDownloadTime = in.readLong(); + if (in.getVersion().onOrAfter(Version.V_2_12_0)) { + totalRejections = in.readVLong(); + } } /** @@ -115,6 +123,7 @@ public RemoteSegmentStats(RemoteSegmentTransferTracker.Stats trackerStats) { this.totalRefreshBytesLag = trackerStats.bytesLag; this.totalUploadTime = trackerStats.totalUploadTimeInMs; this.totalDownloadTime = trackerStats.directoryFileTransferTrackerStats.totalTransferTimeInMs; + this.totalRejections = trackerStats.rejectionCount; } // Getter and setters. All are visible for testing @@ -207,6 +216,14 @@ public void addTotalDownloadTime(long totalDownloadTime) { this.totalDownloadTime += totalDownloadTime; } + public long getTotalRejections() { + return totalRejections; + } + + public void addTotalRejections(long totalRejections) { + this.totalRejections += totalRejections; + } + /** * Adds existing stats. Used for stats roll-ups at index or node level * @@ -225,6 +242,7 @@ public void add(RemoteSegmentStats existingStats) { this.totalRefreshBytesLag += existingStats.getTotalRefreshBytesLag(); this.totalUploadTime += existingStats.getTotalUploadTime(); this.totalDownloadTime += existingStats.getTotalDownloadTime(); + this.totalRejections += existingStats.totalRejections; } } @@ -241,18 +259,25 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(totalRefreshBytesLag); out.writeLong(totalUploadTime); out.writeLong(totalDownloadTime); + if (out.getVersion().onOrAfter(Version.V_2_12_0)) { + out.writeVLong(totalRejections); + } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(Fields.REMOTE_STORE); + builder.startObject(Fields.UPLOAD); buildUploadStats(builder); - builder.endObject(); + builder.endObject(); // UPLOAD + builder.startObject(Fields.DOWNLOAD); buildDownloadStats(builder); - builder.endObject(); - builder.endObject(); + builder.endObject(); // DOWNLOAD + + builder.endObject(); // REMOTE_STORE + return builder; } @@ -261,13 +286,19 @@ private void buildUploadStats(XContentBuilder builder) throws IOException { builder.humanReadableField(Fields.STARTED_BYTES, Fields.STARTED, new ByteSizeValue(uploadBytesStarted)); builder.humanReadableField(Fields.SUCCEEDED_BYTES, Fields.SUCCEEDED, new ByteSizeValue(uploadBytesSucceeded)); builder.humanReadableField(Fields.FAILED_BYTES, Fields.FAILED, new ByteSizeValue(uploadBytesFailed)); - builder.endObject(); + builder.endObject(); // TOTAL_UPLOAD_SIZE + builder.startObject(Fields.REFRESH_SIZE_LAG); builder.humanReadableField(Fields.TOTAL_BYTES, Fields.TOTAL, new ByteSizeValue(totalRefreshBytesLag)); builder.humanReadableField(Fields.MAX_BYTES, Fields.MAX, new ByteSizeValue(maxRefreshBytesLag)); - builder.endObject(); + builder.endObject(); // REFRESH_SIZE_LAG + builder.humanReadableField(Fields.MAX_REFRESH_TIME_LAG_IN_MILLIS, Fields.MAX_REFRESH_TIME_LAG, new TimeValue(maxRefreshTimeLag)); builder.humanReadableField(Fields.TOTAL_TIME_SPENT_IN_MILLIS, Fields.TOTAL_TIME_SPENT, new TimeValue(totalUploadTime)); + + builder.startObject(Fields.PRESSURE); + builder.field(Fields.TOTAL_REJECTIONS, totalRejections); + builder.endObject(); // PRESSURE } private void buildDownloadStats(XContentBuilder builder) throws IOException { @@ -300,6 +331,8 @@ static final class Fields { static final String MAX_BYTES = "max_bytes"; static final String TOTAL_TIME_SPENT = "total_time_spent"; static final String TOTAL_TIME_SPENT_IN_MILLIS = "total_time_spent_in_millis"; + static final String PRESSURE = "pressure"; + static final String TOTAL_REJECTIONS = "total_rejections"; } @Override @@ -318,7 +351,8 @@ public boolean equals(Object o) { && maxRefreshBytesLag == that.maxRefreshBytesLag && totalRefreshBytesLag == that.totalRefreshBytesLag && totalUploadTime == that.totalUploadTime - && totalDownloadTime == that.totalDownloadTime; + && totalDownloadTime == that.totalDownloadTime + && totalRejections == that.totalRejections; } @Override @@ -334,7 +368,8 @@ public int hashCode() { maxRefreshBytesLag, totalRefreshBytesLag, totalUploadTime, - totalDownloadTime + totalDownloadTime, + totalRejections ); } } diff --git a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java index 05081180bb179..2a703f17aa953 100644 --- a/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java +++ b/server/src/main/java/org/opensearch/index/remote/RemoteSegmentTransferTracker.java @@ -267,7 +267,8 @@ public long getRejectionCount() { return rejectionCount.get(); } - void incrementRejectionCount() { + /** public only for testing **/ + public void incrementRejectionCount() { rejectionCount.incrementAndGet(); } diff --git a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java index 114702ff0d351..4e2e9f280d765 100644 --- a/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java +++ b/server/src/main/java/org/opensearch/monitor/fs/FsInfo.java @@ -235,6 +235,14 @@ public static class DeviceStats implements Writeable, ToXContentFragment { final long previousWritesCompleted; final long currentSectorsWritten; final long previousSectorsWritten; + final long currentReadTime; + final long previousReadTime; + final long currentWriteTime; + final long previousWriteTime; + final long currentQueueSize; + final long previousQueueSize; + final long currentIOTime; + final long previousIOTime; public DeviceStats( final int majorDeviceNumber, @@ -244,6 +252,10 @@ public DeviceStats( final long currentSectorsRead, final long currentWritesCompleted, final long currentSectorsWritten, + final long currentReadTime, + final long currentWriteTime, + final long currrentQueueSize, + final long currentIOTime, final DeviceStats previousDeviceStats ) { this( @@ -257,7 +269,15 @@ public DeviceStats( currentSectorsRead, previousDeviceStats != null ? previousDeviceStats.currentSectorsRead : -1, currentWritesCompleted, - previousDeviceStats != null ? previousDeviceStats.currentWritesCompleted : -1 + previousDeviceStats != null ? previousDeviceStats.currentWritesCompleted : -1, + currentReadTime, + previousDeviceStats != null ? previousDeviceStats.currentReadTime : -1, + currentWriteTime, + previousDeviceStats != null ? previousDeviceStats.currentWriteTime : -1, + currrentQueueSize, + previousDeviceStats != null ? previousDeviceStats.currentQueueSize : -1, + currentIOTime, + previousDeviceStats != null ? previousDeviceStats.currentIOTime : -1 ); } @@ -272,7 +292,15 @@ private DeviceStats( final long currentSectorsRead, final long previousSectorsRead, final long currentWritesCompleted, - final long previousWritesCompleted + final long previousWritesCompleted, + final long currentReadTime, + final long previousReadTime, + final long currentWriteTime, + final long previousWriteTime, + final long currentQueueSize, + final long previousQueueSize, + final long currentIOTime, + final long previousIOTime ) { this.majorDeviceNumber = majorDeviceNumber; this.minorDeviceNumber = minorDeviceNumber; @@ -285,6 +313,14 @@ private DeviceStats( this.previousSectorsRead = previousSectorsRead; this.currentSectorsWritten = currentSectorsWritten; this.previousSectorsWritten = previousSectorsWritten; + this.currentReadTime = currentReadTime; + this.previousReadTime = previousReadTime; + this.currentWriteTime = currentWriteTime; + this.previousWriteTime = previousWriteTime; + this.currentQueueSize = currentQueueSize; + this.previousQueueSize = previousQueueSize; + this.currentIOTime = currentIOTime; + this.previousIOTime = previousIOTime; } public DeviceStats(StreamInput in) throws IOException { @@ -299,6 +335,25 @@ public DeviceStats(StreamInput in) throws IOException { previousSectorsRead = in.readLong(); currentSectorsWritten = in.readLong(); previousSectorsWritten = in.readLong(); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + currentReadTime = in.readLong(); + previousReadTime = in.readLong(); + currentWriteTime = in.readLong(); + previousWriteTime = in.readLong(); + currentQueueSize = in.readLong(); + previousQueueSize = in.readLong(); + currentIOTime = in.readLong(); + previousIOTime = in.readLong(); + } else { + currentReadTime = 0; + previousReadTime = 0; + currentWriteTime = 0; + previousWriteTime = 0; + currentQueueSize = 0; + previousQueueSize = 0; + currentIOTime = 0; + previousIOTime = 0; + } } @Override @@ -314,6 +369,16 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(previousSectorsRead); out.writeLong(currentSectorsWritten); out.writeLong(previousSectorsWritten); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeLong(currentReadTime); + out.writeLong(previousReadTime); + out.writeLong(currentWriteTime); + out.writeLong(previousWriteTime); + out.writeLong(currentQueueSize); + out.writeLong(previousQueueSize); + out.writeLong(currentIOTime); + out.writeLong(previousIOTime); + } } public long operations() { @@ -346,6 +411,39 @@ public long writeKilobytes() { return (currentSectorsWritten - previousSectorsWritten) / 2; } + /** + * Total time taken for all read operations + */ + public long readTime() { + if (previousReadTime == -1) return -1; + return currentReadTime - previousReadTime; + } + + /** + * Total time taken for all write operations + */ + public long writeTime() { + if (previousWriteTime == -1) return -1; + return currentWriteTime - previousWriteTime; + } + + /** + * Queue size based on weighted time spent doing I/Os + */ + public long queueSize() { + if (previousQueueSize == -1) return -1; + return currentQueueSize - previousQueueSize; + } + + /** + * Total time spent doing I/Os + */ + public long ioTimeInMillis() { + if (previousIOTime == -1) return -1; + + return (currentIOTime - previousIOTime); + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field("device_name", deviceName); @@ -354,9 +452,12 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(IoStats.WRITE_OPERATIONS, writeOperations()); builder.field(IoStats.READ_KILOBYTES, readKilobytes()); builder.field(IoStats.WRITE_KILOBYTES, writeKilobytes()); + builder.field(IoStats.READ_TIME, readTime()); + builder.field(IoStats.WRITE_TIME, writeTime()); + builder.field(IoStats.QUEUE_SIZE, queueSize()); + builder.field(IoStats.IO_TIME_MS, ioTimeInMillis()); return builder; } - } /** @@ -371,6 +472,10 @@ public static class IoStats implements Writeable, ToXContentFragment { private static final String WRITE_OPERATIONS = "write_operations"; private static final String READ_KILOBYTES = "read_kilobytes"; private static final String WRITE_KILOBYTES = "write_kilobytes"; + private static final String READ_TIME = "read_time"; + private static final String WRITE_TIME = "write_time"; + private static final String QUEUE_SIZE = "queue_size"; + private static final String IO_TIME_MS = "io_time_in_millis"; final DeviceStats[] devicesStats; final long totalOperations; @@ -378,6 +483,10 @@ public static class IoStats implements Writeable, ToXContentFragment { final long totalWriteOperations; final long totalReadKilobytes; final long totalWriteKilobytes; + final long totalReadTime; + final long totalWriteTime; + final long totalQueueSize; + final long totalIOTimeInMillis; public IoStats(final DeviceStats[] devicesStats) { this.devicesStats = devicesStats; @@ -386,18 +495,30 @@ public IoStats(final DeviceStats[] devicesStats) { long totalWriteOperations = 0; long totalReadKilobytes = 0; long totalWriteKilobytes = 0; + long totalReadTime = 0; + long totalWriteTime = 0; + long totalQueueSize = 0; + long totalIOTimeInMillis = 0; for (DeviceStats deviceStats : devicesStats) { totalOperations += deviceStats.operations() != -1 ? deviceStats.operations() : 0; totalReadOperations += deviceStats.readOperations() != -1 ? deviceStats.readOperations() : 0; totalWriteOperations += deviceStats.writeOperations() != -1 ? deviceStats.writeOperations() : 0; totalReadKilobytes += deviceStats.readKilobytes() != -1 ? deviceStats.readKilobytes() : 0; totalWriteKilobytes += deviceStats.writeKilobytes() != -1 ? deviceStats.writeKilobytes() : 0; + totalReadTime += deviceStats.readTime() != -1 ? deviceStats.readTime() : 0; + totalWriteTime += deviceStats.writeTime() != -1 ? deviceStats.writeTime() : 0; + totalQueueSize += deviceStats.queueSize() != -1 ? deviceStats.queueSize() : 0; + totalIOTimeInMillis += deviceStats.ioTimeInMillis() != -1 ? deviceStats.ioTimeInMillis() : 0; } this.totalOperations = totalOperations; this.totalReadOperations = totalReadOperations; this.totalWriteOperations = totalWriteOperations; this.totalReadKilobytes = totalReadKilobytes; this.totalWriteKilobytes = totalWriteKilobytes; + this.totalReadTime = totalReadTime; + this.totalWriteTime = totalWriteTime; + this.totalQueueSize = totalQueueSize; + this.totalIOTimeInMillis = totalIOTimeInMillis; } public IoStats(StreamInput in) throws IOException { @@ -412,6 +533,17 @@ public IoStats(StreamInput in) throws IOException { this.totalWriteOperations = in.readLong(); this.totalReadKilobytes = in.readLong(); this.totalWriteKilobytes = in.readLong(); + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + this.totalReadTime = in.readLong(); + this.totalWriteTime = in.readLong(); + this.totalQueueSize = in.readLong(); + this.totalIOTimeInMillis = in.readLong(); + } else { + this.totalReadTime = 0; + this.totalWriteTime = 0; + this.totalQueueSize = 0; + this.totalIOTimeInMillis = 0; + } } @Override @@ -425,6 +557,12 @@ public void writeTo(StreamOutput out) throws IOException { out.writeLong(totalWriteOperations); out.writeLong(totalReadKilobytes); out.writeLong(totalWriteKilobytes); + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeLong(totalReadTime); + out.writeLong(totalWriteTime); + out.writeLong(totalQueueSize); + out.writeLong(totalIOTimeInMillis); + } } public DeviceStats[] getDevicesStats() { @@ -451,6 +589,34 @@ public long getTotalWriteKilobytes() { return totalWriteKilobytes; } + /** + * Sum of read time across all devices + */ + public long getTotalReadTime() { + return totalReadTime; + } + + /** + * Sum of write time across all devices + */ + public long getTotalWriteTime() { + return totalWriteTime; + } + + /** + * Sum of queue size across all devices + */ + public long getTotalQueueSize() { + return totalQueueSize; + } + + /** + * Sum of IO time across all devices + */ + public long getTotalIOTimeMillis() { + return totalIOTimeInMillis; + } + @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { if (devicesStats.length > 0) { @@ -468,11 +634,15 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(WRITE_OPERATIONS, totalWriteOperations); builder.field(READ_KILOBYTES, totalReadKilobytes); builder.field(WRITE_KILOBYTES, totalWriteKilobytes); + + builder.field(READ_TIME, totalReadTime); + builder.field(WRITE_TIME, totalWriteTime); + builder.field(QUEUE_SIZE, totalQueueSize); + builder.field(IO_TIME_MS, totalIOTimeInMillis); builder.endObject(); } return builder; } - } private final long timestamp; diff --git a/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java b/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java index e20d84cd9763e..f4731a4a34373 100644 --- a/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java +++ b/server/src/main/java/org/opensearch/monitor/fs/FsProbe.java @@ -109,6 +109,25 @@ final FsInfo.IoStats ioStats(final Set> devicesNumbers, List devicesStats = new ArrayList<>(); + /** + * The /proc/diskstats file displays the I/O statistics of block devices. + * Each line contains the following 14 fields: ( + additional fields ) + * + * 1 major number + * 2 minor number + * 3 device name + * 4 reads completed successfully + * 5 reads merged + * 6 sectors read + * 7 time spent reading (ms) + * 8 writes completed + * 9 writes merged + * 10 sectors written + * 11 time spent writing (ms) + * 12 I/Os currently in progress + * 13 time spent doing I/Os (ms) ---- IO use percent + * 14 weighted time spent doing I/Os (ms) ---- Queue size + */ List lines = readProcDiskStats(); if (!lines.isEmpty()) { for (String line : lines) { @@ -123,6 +142,12 @@ final FsInfo.IoStats ioStats(final Set> devicesNumbers, final long sectorsRead = Long.parseLong(fields[5]); final long writesCompleted = Long.parseLong(fields[7]); final long sectorsWritten = Long.parseLong(fields[9]); + // readTime and writeTime calculates the total read/write time taken for each request to complete + // ioTime calculates actual time queue and disks are busy + final long readTime = Long.parseLong(fields[6]); + final long writeTime = Long.parseLong(fields[10]); + final long ioTime = fields.length > 12 ? Long.parseLong(fields[12]) : 0; + final long queueSize = fields.length > 13 ? Long.parseLong(fields[13]) : 0; final FsInfo.DeviceStats deviceStats = new FsInfo.DeviceStats( majorDeviceNumber, minorDeviceNumber, @@ -131,6 +156,10 @@ final FsInfo.IoStats ioStats(final Set> devicesNumbers, sectorsRead, writesCompleted, sectorsWritten, + readTime, + writeTime, + queueSize, + ioTime, deviceMap.get(Tuple.tuple(majorDeviceNumber, minorDeviceNumber)) ); devicesStats.add(deviceStats); diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index 66c0eeee8160f..c9148f382a028 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -167,6 +167,7 @@ import org.opensearch.monitor.fs.FsProbe; import org.opensearch.monitor.jvm.JvmInfo; import org.opensearch.node.remotestore.RemoteStoreNodeService; +import org.opensearch.node.resource.tracker.NodeResourceUsageTracker; import org.opensearch.persistent.PersistentTasksClusterService; import org.opensearch.persistent.PersistentTasksExecutor; import org.opensearch.persistent.PersistentTasksExecutorRegistry; @@ -599,10 +600,23 @@ protected Node( MetricsRegistryFactory metricsRegistryFactory; if (FeatureFlags.isEnabled(TELEMETRY)) { final TelemetrySettings telemetrySettings = new TelemetrySettings(settings, clusterService.getClusterSettings()); - List telemetryPlugins = pluginsService.filterPlugins(TelemetryPlugin.class); - TelemetryModule telemetryModule = new TelemetryModule(telemetryPlugins, telemetrySettings); - tracerFactory = new TracerFactory(telemetrySettings, telemetryModule.getTelemetry(), threadPool.getThreadContext()); - metricsRegistryFactory = new MetricsRegistryFactory(telemetrySettings, telemetryModule.getTelemetry()); + if (telemetrySettings.isTracingFeatureEnabled() || telemetrySettings.isMetricsFeatureEnabled()) { + List telemetryPlugins = pluginsService.filterPlugins(TelemetryPlugin.class); + TelemetryModule telemetryModule = new TelemetryModule(telemetryPlugins, telemetrySettings); + if (telemetrySettings.isTracingFeatureEnabled()) { + tracerFactory = new TracerFactory(telemetrySettings, telemetryModule.getTelemetry(), threadPool.getThreadContext()); + } else { + tracerFactory = new NoopTracerFactory(); + } + if (telemetrySettings.isMetricsFeatureEnabled()) { + metricsRegistryFactory = new MetricsRegistryFactory(telemetrySettings, telemetryModule.getTelemetry()); + } else { + metricsRegistryFactory = new NoopMetricsRegistryFactory(); + } + } else { + tracerFactory = new NoopTracerFactory(); + metricsRegistryFactory = new NoopMetricsRegistryFactory(); + } } else { tracerFactory = new NoopTracerFactory(); metricsRegistryFactory = new NoopMetricsRegistryFactory(); @@ -805,7 +819,6 @@ protected Node( remoteStoreStatsTrackerFactory, recoverySettings ); - final AliasValidator aliasValidator = new AliasValidator(); final ShardLimitValidator shardLimitValidator = new ShardLimitValidator(settings, clusterService, systemIndices); @@ -1070,6 +1083,16 @@ protected Node( transportService.getTaskManager(), taskCancellationMonitoringSettings ); + final NodeResourceUsageTracker nodeResourceUsageTracker = new NodeResourceUsageTracker( + threadPool, + settings, + clusterService.getClusterSettings() + ); + final ResourceUsageCollectorService resourceUsageCollectorService = new ResourceUsageCollectorService( + nodeResourceUsageTracker, + clusterService, + threadPool + ); this.nodeService = new NodeService( settings, threadPool, @@ -1092,6 +1115,7 @@ protected Node( searchPipelineService, fileCache, taskCancellationMonitoringService, + resourceUsageCollectorService, repositoryService ); @@ -1213,6 +1237,8 @@ protected Node( b.bind(RerouteService.class).toInstance(rerouteService); b.bind(ShardLimitValidator.class).toInstance(shardLimitValidator); b.bind(FsHealthService.class).toInstance(fsHealthService); + b.bind(NodeResourceUsageTracker.class).toInstance(nodeResourceUsageTracker); + b.bind(ResourceUsageCollectorService.class).toInstance(resourceUsageCollectorService); b.bind(SystemIndices.class).toInstance(systemIndices); b.bind(IdentityService.class).toInstance(identityService); b.bind(Tracer.class).toInstance(tracer); @@ -1329,6 +1355,8 @@ public Node start() throws NodeValidationException { injector.getInstance(RepositoriesService.class).start(); injector.getInstance(SearchService.class).start(); injector.getInstance(FsHealthService.class).start(); + injector.getInstance(NodeResourceUsageTracker.class).start(); + injector.getInstance(ResourceUsageCollectorService.class).start(); nodeService.getMonitorService().start(); nodeService.getSearchBackpressureService().start(); nodeService.getTaskCancellationMonitoringService().start(); @@ -1491,6 +1519,8 @@ private Node stop() { injector.getInstance(ClusterService.class).stop(); injector.getInstance(NodeConnectionsService.class).stop(); injector.getInstance(FsHealthService.class).stop(); + injector.getInstance(NodeResourceUsageTracker.class).stop(); + injector.getInstance(ResourceUsageCollectorService.class).stop(); nodeService.getMonitorService().stop(); nodeService.getSearchBackpressureService().stop(); injector.getInstance(GatewayService.class).stop(); @@ -1554,6 +1584,10 @@ public synchronized void close() throws IOException { toClose.add(nodeService.getSearchBackpressureService()); toClose.add(() -> stopWatch.stop().start("fsHealth")); toClose.add(injector.getInstance(FsHealthService.class)); + toClose.add(() -> stopWatch.stop().start("resource_usage_tracker")); + toClose.add(injector.getInstance(NodeResourceUsageTracker.class)); + toClose.add(() -> stopWatch.stop().start("resource_usage_collector")); + toClose.add(injector.getInstance(ResourceUsageCollectorService.class)); toClose.add(() -> stopWatch.stop().start("gateway")); toClose.add(injector.getInstance(GatewayService.class)); toClose.add(() -> stopWatch.stop().start("search")); diff --git a/server/src/main/java/org/opensearch/node/NodeResourceUsageStats.java b/server/src/main/java/org/opensearch/node/NodeResourceUsageStats.java new file mode 100644 index 0000000000000..6ef66d4ac1914 --- /dev/null +++ b/server/src/main/java/org/opensearch/node/NodeResourceUsageStats.java @@ -0,0 +1,81 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.Locale; + +/** + * This represents the resource usage stats of a node along with the timestamp at which the stats object was created + * in the respective node + */ +public class NodeResourceUsageStats implements Writeable { + final String nodeId; + long timestamp; + double cpuUtilizationPercent; + double memoryUtilizationPercent; + + public NodeResourceUsageStats(String nodeId, long timestamp, double memoryUtilizationPercent, double cpuUtilizationPercent) { + this.nodeId = nodeId; + this.timestamp = timestamp; + this.cpuUtilizationPercent = cpuUtilizationPercent; + this.memoryUtilizationPercent = memoryUtilizationPercent; + } + + public NodeResourceUsageStats(StreamInput in) throws IOException { + this.nodeId = in.readString(); + this.timestamp = in.readLong(); + this.cpuUtilizationPercent = in.readDouble(); + this.memoryUtilizationPercent = in.readDouble(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(this.nodeId); + out.writeLong(this.timestamp); + out.writeDouble(this.cpuUtilizationPercent); + out.writeDouble(this.memoryUtilizationPercent); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("NodeResourceUsageStats["); + sb.append(nodeId).append("]("); + sb.append("Timestamp: ").append(timestamp); + sb.append(", CPU utilization percent: ").append(String.format(Locale.ROOT, "%.1f", cpuUtilizationPercent)); + sb.append(", Memory utilization percent: ").append(String.format(Locale.ROOT, "%.1f", memoryUtilizationPercent)); + sb.append(")"); + return sb.toString(); + } + + NodeResourceUsageStats(NodeResourceUsageStats nodeResourceUsageStats) { + this( + nodeResourceUsageStats.nodeId, + nodeResourceUsageStats.timestamp, + nodeResourceUsageStats.memoryUtilizationPercent, + nodeResourceUsageStats.cpuUtilizationPercent + ); + } + + public double getMemoryUtilizationPercent() { + return memoryUtilizationPercent; + } + + public double getCpuUtilizationPercent() { + return cpuUtilizationPercent; + } + + public long getTimestamp() { + return timestamp; + } +} diff --git a/server/src/main/java/org/opensearch/node/NodeService.java b/server/src/main/java/org/opensearch/node/NodeService.java index affa293390c31..e2d7bc2c86ba3 100644 --- a/server/src/main/java/org/opensearch/node/NodeService.java +++ b/server/src/main/java/org/opensearch/node/NodeService.java @@ -84,6 +84,7 @@ public class NodeService implements Closeable { private final ScriptService scriptService; private final HttpServerTransport httpServerTransport; private final ResponseCollectorService responseCollectorService; + private final ResourceUsageCollectorService resourceUsageCollectorService; private final SearchTransportService searchTransportService; private final IndexingPressureService indexingPressureService; private final AggregationUsageService aggregationUsageService; @@ -117,6 +118,7 @@ public class NodeService implements Closeable { SearchPipelineService searchPipelineService, FileCache fileCache, TaskCancellationMonitoringService taskCancellationMonitoringService, + ResourceUsageCollectorService resourceUsageCollectorService, RepositoriesService repositoriesService ) { this.settings = settings; @@ -140,6 +142,7 @@ public class NodeService implements Closeable { this.clusterService = clusterService; this.fileCache = fileCache; this.taskCancellationMonitoringService = taskCancellationMonitoringService; + this.resourceUsageCollectorService = resourceUsageCollectorService; this.repositoriesService = repositoriesService; clusterService.addStateApplier(ingestService); clusterService.addStateApplier(searchPipelineService); @@ -222,6 +225,7 @@ public NodeStats stats( boolean fileCacheStats, boolean taskCancellation, boolean searchPipelineStats, + boolean resourceUsageStats, boolean repositoriesStats ) { // for indices stats we want to include previous allocated shards stats as well (it will @@ -242,6 +246,7 @@ public NodeStats stats( discoveryStats ? discovery.stats() : null, ingest ? ingestService.stats() : null, adaptiveSelection ? responseCollectorService.getAdaptiveStats(searchTransportService.getPendingSearchRequests()) : null, + resourceUsageStats ? resourceUsageCollectorService.stats() : null, scriptCache ? scriptService.cacheStats() : null, indexingPressure ? this.indexingPressureService.nodeStats() : null, shardIndexingPressure ? this.indexingPressureService.shardStats(indices) : null, diff --git a/server/src/main/java/org/opensearch/node/NodesResourceUsageStats.java b/server/src/main/java/org/opensearch/node/NodesResourceUsageStats.java new file mode 100644 index 0000000000000..3dff9a27f71a8 --- /dev/null +++ b/server/src/main/java/org/opensearch/node/NodesResourceUsageStats.java @@ -0,0 +1,69 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node; + +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Locale; +import java.util.Map; + +/** + * This class represents resource usage stats such as CPU, Memory and IO resource usage of each node along with the + * timestamp of the stats recorded. + */ +public class NodesResourceUsageStats implements Writeable, ToXContentFragment { + + // Map of node id to resource usage stats of the corresponding node. + private final Map nodeIdToResourceUsageStatsMap; + + public NodesResourceUsageStats(Map nodeIdToResourceUsageStatsMap) { + this.nodeIdToResourceUsageStatsMap = nodeIdToResourceUsageStatsMap; + } + + public NodesResourceUsageStats(StreamInput in) throws IOException { + this.nodeIdToResourceUsageStatsMap = in.readMap(StreamInput::readString, NodeResourceUsageStats::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeMap(this.nodeIdToResourceUsageStatsMap, StreamOutput::writeString, (stream, stats) -> stats.writeTo(stream)); + } + + /** + * Returns map of node id to resource usage stats of the corresponding node. + */ + public Map getNodeIdToResourceUsageStatsMap() { + return nodeIdToResourceUsageStatsMap; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject("resource_usage_stats"); + for (String nodeId : nodeIdToResourceUsageStatsMap.keySet()) { + builder.startObject(nodeId); + NodeResourceUsageStats resourceUsageStats = nodeIdToResourceUsageStatsMap.get(nodeId); + if (resourceUsageStats != null) { + builder.field("timestamp", resourceUsageStats.timestamp); + builder.field("cpu_utilization_percent", String.format(Locale.ROOT, "%.1f", resourceUsageStats.cpuUtilizationPercent)); + builder.field( + "memory_utilization_percent", + String.format(Locale.ROOT, "%.1f", resourceUsageStats.memoryUtilizationPercent) + ); + } + builder.endObject(); + } + builder.endObject(); + return builder; + } +} diff --git a/server/src/main/java/org/opensearch/node/ResourceUsageCollectorService.java b/server/src/main/java/org/opensearch/node/ResourceUsageCollectorService.java new file mode 100644 index 0000000000000..f1c763e09f147 --- /dev/null +++ b/server/src/main/java/org/opensearch/node/ResourceUsageCollectorService.java @@ -0,0 +1,160 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterStateListener; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.lifecycle.AbstractLifecycleComponent; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.concurrent.ConcurrentCollections; +import org.opensearch.node.resource.tracker.NodeResourceUsageTracker; +import org.opensearch.threadpool.Scheduler; +import org.opensearch.threadpool.ThreadPool; + +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.ConcurrentMap; + +/** + * This collects node level resource usage statistics such as cpu, memory, IO of each node and makes it available for + * coordinator node to aid in throttling, ranking etc + */ +public class ResourceUsageCollectorService extends AbstractLifecycleComponent implements ClusterStateListener { + + /** + * This refresh interval denotes the polling interval of ResourceUsageCollectorService to refresh the resource usage + * stats from local node + */ + private static long REFRESH_INTERVAL_IN_MILLIS = 1000; + + private static final Logger logger = LogManager.getLogger(ResourceUsageCollectorService.class); + private final ConcurrentMap nodeIdToResourceUsageStats = ConcurrentCollections.newConcurrentMap(); + + private ThreadPool threadPool; + private volatile Scheduler.Cancellable scheduledFuture; + + private NodeResourceUsageTracker nodeResourceUsageTracker; + private ClusterService clusterService; + + public ResourceUsageCollectorService( + NodeResourceUsageTracker nodeResourceUsageTracker, + ClusterService clusterService, + ThreadPool threadPool + ) { + this.threadPool = threadPool; + this.nodeResourceUsageTracker = nodeResourceUsageTracker; + this.clusterService = clusterService; + clusterService.addListener(this); + } + + @Override + public void clusterChanged(ClusterChangedEvent event) { + if (event.nodesRemoved()) { + for (DiscoveryNode removedNode : event.nodesDelta().removedNodes()) { + removeNodeResourceUsageStats(removedNode.getId()); + } + } + } + + void removeNodeResourceUsageStats(String nodeId) { + nodeIdToResourceUsageStats.remove(nodeId); + } + + /** + * Collect node resource usage stats along with the timestamp + */ + public void collectNodeResourceUsageStats( + String nodeId, + long timestamp, + double memoryUtilizationPercent, + double cpuUtilizationPercent + ) { + nodeIdToResourceUsageStats.compute(nodeId, (id, resourceUsageStats) -> { + if (resourceUsageStats == null) { + return new NodeResourceUsageStats(nodeId, timestamp, memoryUtilizationPercent, cpuUtilizationPercent); + } else { + resourceUsageStats.cpuUtilizationPercent = cpuUtilizationPercent; + resourceUsageStats.memoryUtilizationPercent = memoryUtilizationPercent; + resourceUsageStats.timestamp = timestamp; + return resourceUsageStats; + } + }); + } + + /** + * Get all node resource usage statistics which will be used for node stats + */ + public Map getAllNodeStatistics() { + Map nodeStats = new HashMap<>(nodeIdToResourceUsageStats.size()); + nodeIdToResourceUsageStats.forEach((nodeId, resourceUsageStats) -> { + nodeStats.put(nodeId, new NodeResourceUsageStats(resourceUsageStats)); + }); + return nodeStats; + } + + /** + * Optionally return a {@code NodeResourceUsageStats} for the given nodeid, if + * resource usage stats information exists for the given node. Returns an empty + * {@code Optional} if the node was not found. + */ + public Optional getNodeStatistics(final String nodeId) { + return Optional.ofNullable(nodeIdToResourceUsageStats.get(nodeId)) + .map(resourceUsageStats -> new NodeResourceUsageStats(resourceUsageStats)); + } + + /** + * Returns collected resource usage statistics of all nodes + */ + public NodesResourceUsageStats stats() { + return new NodesResourceUsageStats(getAllNodeStatistics()); + } + + /** + * Fetch local node resource usage statistics and add it to store along with the current timestamp + */ + private void collectLocalNodeResourceUsageStats() { + if (nodeResourceUsageTracker.isReady() && clusterService.state() != null) { + collectNodeResourceUsageStats( + clusterService.state().nodes().getLocalNodeId(), + System.currentTimeMillis(), + nodeResourceUsageTracker.getMemoryUtilizationPercent(), + nodeResourceUsageTracker.getCpuUtilizationPercent() + ); + } + } + + @Override + protected void doStart() { + /** + * Fetch local node resource usage statistics every second + */ + scheduledFuture = threadPool.scheduleWithFixedDelay(() -> { + try { + collectLocalNodeResourceUsageStats(); + } catch (Exception e) { + logger.warn("failure in ResourceUsageCollectorService", e); + } + }, new TimeValue(REFRESH_INTERVAL_IN_MILLIS), ThreadPool.Names.GENERIC); + } + + @Override + protected void doStop() { + if (scheduledFuture != null) { + scheduledFuture.cancel(); + } + } + + @Override + protected void doClose() {} +} diff --git a/server/src/main/java/org/opensearch/node/resource/tracker/AbstractAverageUsageTracker.java b/server/src/main/java/org/opensearch/node/resource/tracker/AbstractAverageUsageTracker.java new file mode 100644 index 0000000000000..f83a1b7f9fc05 --- /dev/null +++ b/server/src/main/java/org/opensearch/node/resource/tracker/AbstractAverageUsageTracker.java @@ -0,0 +1,100 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node.resource.tracker; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.lifecycle.AbstractLifecycleComponent; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.MovingAverage; +import org.opensearch.threadpool.Scheduler; +import org.opensearch.threadpool.ThreadPool; + +import java.util.concurrent.atomic.AtomicReference; + +/** + * Base class for sliding window resource usage trackers + */ +public abstract class AbstractAverageUsageTracker extends AbstractLifecycleComponent { + private static final Logger LOGGER = LogManager.getLogger(AbstractAverageUsageTracker.class); + + private final ThreadPool threadPool; + private final TimeValue pollingInterval; + private TimeValue windowDuration; + private final AtomicReference observations = new AtomicReference<>(); + + private volatile Scheduler.Cancellable scheduledFuture; + + public AbstractAverageUsageTracker(ThreadPool threadPool, TimeValue pollingInterval, TimeValue windowDuration) { + this.threadPool = threadPool; + this.pollingInterval = pollingInterval; + this.windowDuration = windowDuration; + this.setWindowSize(windowDuration); + } + + public abstract long getUsage(); + + /** + * Returns the moving average of the datapoints + */ + public double getAverage() { + return observations.get().getAverage(); + } + + /** + * Checks if we have datapoints more than or equal to the window size + */ + public boolean isReady() { + return observations.get().isReady(); + } + + /** + * Creates a new instance of MovingAverage with a new window size based on WindowDuration + */ + public void setWindowSize(TimeValue windowDuration) { + this.windowDuration = windowDuration; + int windowSize = (int) (windowDuration.nanos() / pollingInterval.nanos()); + LOGGER.debug("updated window size: {}", windowSize); + observations.set(new MovingAverage(windowSize)); + } + + public TimeValue getPollingInterval() { + return pollingInterval; + } + + public TimeValue getWindowDuration() { + return windowDuration; + } + + public long getWindowSize() { + return observations.get().getCount(); + } + + public void recordUsage(long usage) { + observations.get().record(usage); + } + + @Override + protected void doStart() { + scheduledFuture = threadPool.scheduleWithFixedDelay(() -> { + long usage = getUsage(); + recordUsage(usage); + }, pollingInterval, ThreadPool.Names.GENERIC); + } + + @Override + protected void doStop() { + if (scheduledFuture != null) { + scheduledFuture.cancel(); + } + } + + @Override + protected void doClose() {} +} diff --git a/server/src/main/java/org/opensearch/node/resource/tracker/AverageCpuUsageTracker.java b/server/src/main/java/org/opensearch/node/resource/tracker/AverageCpuUsageTracker.java new file mode 100644 index 0000000000000..160d385762eb0 --- /dev/null +++ b/server/src/main/java/org/opensearch/node/resource/tracker/AverageCpuUsageTracker.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node.resource.tracker; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.monitor.process.ProcessProbe; +import org.opensearch.threadpool.ThreadPool; + +/** + * AverageCpuUsageTracker tracks the average CPU usage by polling the CPU usage every (pollingInterval) + * and keeping track of the rolling average over a defined time window (windowDuration). + */ +public class AverageCpuUsageTracker extends AbstractAverageUsageTracker { + private static final Logger LOGGER = LogManager.getLogger(AverageCpuUsageTracker.class); + + public AverageCpuUsageTracker(ThreadPool threadPool, TimeValue pollingInterval, TimeValue windowDuration) { + super(threadPool, pollingInterval, windowDuration); + } + + /** + * Returns the process CPU usage in percent + */ + @Override + public long getUsage() { + long usage = ProcessProbe.getInstance().getProcessCpuPercent(); + LOGGER.debug("Recording cpu usage: {}%", usage); + return usage; + } + +} diff --git a/server/src/main/java/org/opensearch/node/resource/tracker/AverageMemoryUsageTracker.java b/server/src/main/java/org/opensearch/node/resource/tracker/AverageMemoryUsageTracker.java new file mode 100644 index 0000000000000..c1d1c83656859 --- /dev/null +++ b/server/src/main/java/org/opensearch/node/resource/tracker/AverageMemoryUsageTracker.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node.resource.tracker; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.threadpool.ThreadPool; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; + +/** + * AverageMemoryUsageTracker tracks the average JVM usage by polling the JVM usage every (pollingInterval) + * and keeping track of the rolling average over a defined time window (windowDuration). + */ +public class AverageMemoryUsageTracker extends AbstractAverageUsageTracker { + + private static final Logger LOGGER = LogManager.getLogger(AverageMemoryUsageTracker.class); + + private static final MemoryMXBean MEMORY_MX_BEAN = ManagementFactory.getMemoryMXBean(); + + public AverageMemoryUsageTracker(ThreadPool threadPool, TimeValue pollingInterval, TimeValue windowDuration) { + super(threadPool, pollingInterval, windowDuration); + } + + /** + * Get current memory usage percentage calculated against max heap memory + */ + @Override + public long getUsage() { + long usage = MEMORY_MX_BEAN.getHeapMemoryUsage().getUsed() * 100 / MEMORY_MX_BEAN.getHeapMemoryUsage().getMax(); + LOGGER.debug("Recording memory usage: {}%", usage); + return usage; + } +} diff --git a/server/src/main/java/org/opensearch/node/resource/tracker/NodeResourceUsageTracker.java b/server/src/main/java/org/opensearch/node/resource/tracker/NodeResourceUsageTracker.java new file mode 100644 index 0000000000000..cf5f38c1b004c --- /dev/null +++ b/server/src/main/java/org/opensearch/node/resource/tracker/NodeResourceUsageTracker.java @@ -0,0 +1,118 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node.resource.tracker; + +import org.opensearch.common.lifecycle.AbstractLifecycleComponent; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.threadpool.ThreadPool; + +/** + * This tracks the usage of node resources such as CPU, IO and memory + */ +public class NodeResourceUsageTracker extends AbstractLifecycleComponent { + private ThreadPool threadPool; + private final ClusterSettings clusterSettings; + private AverageCpuUsageTracker cpuUsageTracker; + private AverageMemoryUsageTracker memoryUsageTracker; + + private ResourceTrackerSettings resourceTrackerSettings; + + public NodeResourceUsageTracker(ThreadPool threadPool, Settings settings, ClusterSettings clusterSettings) { + this.threadPool = threadPool; + this.clusterSettings = clusterSettings; + this.resourceTrackerSettings = new ResourceTrackerSettings(settings); + initialize(); + } + + /** + * Return CPU utilization average if we have enough datapoints, otherwise return 0 + */ + public double getCpuUtilizationPercent() { + if (cpuUsageTracker.isReady()) { + return cpuUsageTracker.getAverage(); + } + return 0.0; + } + + /** + * Return memory utilization average if we have enough datapoints, otherwise return 0 + */ + public double getMemoryUtilizationPercent() { + if (memoryUsageTracker.isReady()) { + return memoryUsageTracker.getAverage(); + } + return 0.0; + } + + /** + * Checks if all of the resource usage trackers are ready + */ + public boolean isReady() { + return memoryUsageTracker.isReady() && cpuUsageTracker.isReady(); + } + + void initialize() { + cpuUsageTracker = new AverageCpuUsageTracker( + threadPool, + resourceTrackerSettings.getCpuPollingInterval(), + resourceTrackerSettings.getCpuWindowDuration() + ); + clusterSettings.addSettingsUpdateConsumer( + ResourceTrackerSettings.GLOBAL_CPU_USAGE_AC_WINDOW_DURATION_SETTING, + this::setCpuWindowDuration + ); + + memoryUsageTracker = new AverageMemoryUsageTracker( + threadPool, + resourceTrackerSettings.getMemoryPollingInterval(), + resourceTrackerSettings.getMemoryWindowDuration() + ); + clusterSettings.addSettingsUpdateConsumer( + ResourceTrackerSettings.GLOBAL_JVM_USAGE_AC_WINDOW_DURATION_SETTING, + this::setMemoryWindowDuration + ); + } + + private void setMemoryWindowDuration(TimeValue windowDuration) { + memoryUsageTracker.setWindowSize(windowDuration); + resourceTrackerSettings.setMemoryWindowDuration(windowDuration); + } + + private void setCpuWindowDuration(TimeValue windowDuration) { + cpuUsageTracker.setWindowSize(windowDuration); + resourceTrackerSettings.setCpuWindowDuration(windowDuration); + } + + /** + * Visible for testing + */ + ResourceTrackerSettings getResourceTrackerSettings() { + return resourceTrackerSettings; + } + + @Override + protected void doStart() { + cpuUsageTracker.doStart(); + memoryUsageTracker.doStart(); + } + + @Override + protected void doStop() { + cpuUsageTracker.doStop(); + memoryUsageTracker.doStop(); + } + + @Override + protected void doClose() { + cpuUsageTracker.doClose(); + memoryUsageTracker.doClose(); + } +} diff --git a/server/src/main/java/org/opensearch/node/resource/tracker/ResourceTrackerSettings.java b/server/src/main/java/org/opensearch/node/resource/tracker/ResourceTrackerSettings.java new file mode 100644 index 0000000000000..f81b008ba7e8b --- /dev/null +++ b/server/src/main/java/org/opensearch/node/resource/tracker/ResourceTrackerSettings.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node.resource.tracker; + +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; + +/** + * Settings related to resource usage trackers such as polling interval, window duration etc + */ +public class ResourceTrackerSettings { + + private static class Defaults { + /** + * This is the default polling interval of usage trackers to get the resource utilization data + */ + private static final long POLLING_INTERVAL_IN_MILLIS = 500; + /** + * This is the default window duration on which the average resource utilization values will be calculated + */ + private static final long WINDOW_DURATION_IN_SECONDS = 30; + } + + public static final Setting GLOBAL_CPU_USAGE_AC_POLLING_INTERVAL_SETTING = Setting.positiveTimeSetting( + "node.resource.tracker.global_cpu_usage.polling_interval", + TimeValue.timeValueMillis(Defaults.POLLING_INTERVAL_IN_MILLIS), + Setting.Property.NodeScope + ); + public static final Setting GLOBAL_CPU_USAGE_AC_WINDOW_DURATION_SETTING = Setting.positiveTimeSetting( + "node.resource.tracker.global_cpu_usage.window_duration", + TimeValue.timeValueSeconds(Defaults.WINDOW_DURATION_IN_SECONDS), + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + + public static final Setting GLOBAL_JVM_USAGE_AC_POLLING_INTERVAL_SETTING = Setting.positiveTimeSetting( + "node.resource.tracker.global_jvmmp.polling_interval", + TimeValue.timeValueMillis(Defaults.POLLING_INTERVAL_IN_MILLIS), + Setting.Property.NodeScope + ); + + public static final Setting GLOBAL_JVM_USAGE_AC_WINDOW_DURATION_SETTING = Setting.positiveTimeSetting( + "node.resource.tracker.global_jvmmp.window_duration", + TimeValue.timeValueSeconds(Defaults.WINDOW_DURATION_IN_SECONDS), + Setting.Property.Dynamic, + Setting.Property.NodeScope + ); + private volatile TimeValue cpuWindowDuration; + private volatile TimeValue cpuPollingInterval; + private volatile TimeValue memoryWindowDuration; + private volatile TimeValue memoryPollingInterval; + + public ResourceTrackerSettings(Settings settings) { + this.cpuPollingInterval = GLOBAL_CPU_USAGE_AC_POLLING_INTERVAL_SETTING.get(settings); + this.cpuWindowDuration = GLOBAL_CPU_USAGE_AC_WINDOW_DURATION_SETTING.get(settings); + this.memoryPollingInterval = GLOBAL_JVM_USAGE_AC_POLLING_INTERVAL_SETTING.get(settings); + this.memoryWindowDuration = GLOBAL_JVM_USAGE_AC_WINDOW_DURATION_SETTING.get(settings); + } + + public TimeValue getCpuWindowDuration() { + return this.cpuWindowDuration; + } + + public TimeValue getCpuPollingInterval() { + return cpuPollingInterval; + } + + public TimeValue getMemoryPollingInterval() { + return memoryPollingInterval; + } + + public TimeValue getMemoryWindowDuration() { + return memoryWindowDuration; + } + + public void setCpuWindowDuration(TimeValue cpuWindowDuration) { + this.cpuWindowDuration = cpuWindowDuration; + } + + public void setMemoryWindowDuration(TimeValue memoryWindowDuration) { + this.memoryWindowDuration = memoryWindowDuration; + } +} diff --git a/server/src/main/java/org/opensearch/node/resource/tracker/package-info.java b/server/src/main/java/org/opensearch/node/resource/tracker/package-info.java new file mode 100644 index 0000000000000..aace2a019973e --- /dev/null +++ b/server/src/main/java/org/opensearch/node/resource/tracker/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Node level resource usage stats tracker package + */ +package org.opensearch.node.resource.tracker; diff --git a/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java b/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java index ebfd082d974fd..080366e536da1 100644 --- a/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java +++ b/server/src/main/java/org/opensearch/rest/action/search/RestSearchAction.java @@ -180,6 +180,12 @@ public static void parseSearchRequest( searchRequest.allowPartialSearchResults(request.paramAsBoolean("allow_partial_search_results", null)); } + if (request.hasParam("phase_took")) { + // only set if we have the parameter passed to override the cluster-level default + // else phaseTook = null + searchRequest.setPhaseTook(request.paramAsBoolean("phase_took", true)); + } + // do not allow 'query_and_fetch' or 'dfs_query_and_fetch' search types // from the REST layer. these modes are an internal optimization and should // not be specified explicitly by the user. diff --git a/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java b/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java index edb20cfa9dfc5..24dcab98c8870 100644 --- a/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java +++ b/server/src/main/java/org/opensearch/telemetry/TelemetrySettings.java @@ -28,6 +28,20 @@ public class TelemetrySettings { Setting.Property.Dynamic ); + public static final Setting TRACER_FEATURE_ENABLED_SETTING = Setting.boolSetting( + "telemetry.feature.tracer.enabled", + false, + Setting.Property.NodeScope, + Setting.Property.Final + ); + + public static final Setting METRICS_FEATURE_ENABLED_SETTING = Setting.boolSetting( + "telemetry.feature.metrics.enabled", + false, + Setting.Property.NodeScope, + Setting.Property.Final + ); + /** * Probability of sampler */ @@ -53,9 +67,14 @@ public class TelemetrySettings { private volatile boolean tracingEnabled; private volatile double samplingProbability; + private final boolean tracingFeatureEnabled; + private final boolean metricsFeatureEnabled; + public TelemetrySettings(Settings settings, ClusterSettings clusterSettings) { this.tracingEnabled = TRACER_ENABLED_SETTING.get(settings); this.samplingProbability = TRACER_SAMPLER_PROBABILITY.get(settings); + this.tracingFeatureEnabled = TRACER_FEATURE_ENABLED_SETTING.get(settings); + this.metricsFeatureEnabled = METRICS_FEATURE_ENABLED_SETTING.get(settings); clusterSettings.addSettingsUpdateConsumer(TRACER_ENABLED_SETTING, this::setTracingEnabled); clusterSettings.addSettingsUpdateConsumer(TRACER_SAMPLER_PROBABILITY, this::setSamplingProbability); @@ -83,4 +102,12 @@ public void setSamplingProbability(double samplingProbability) { public double getSamplingProbability() { return samplingProbability; } + + public boolean isTracingFeatureEnabled() { + return tracingFeatureEnabled; + } + + public boolean isMetricsFeatureEnabled() { + return metricsFeatureEnabled; + } } diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java b/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java index b2308402379ac..631fb8242d78e 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/WrappedTracer.java @@ -59,6 +59,11 @@ public SpanScope withSpanInScope(Span span) { return getDelegateTracer().withSpanInScope(span); } + @Override + public boolean isRecording() { + return getDelegateTracer().isRecording(); + } + @Override public void close() throws IOException { defaultTracer.close(); diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java index 0a9757310fe8b..e0fb690bd29be 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableHttpChannel.java @@ -8,7 +8,6 @@ package org.opensearch.telemetry.tracing.channels; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.http.HttpChannel; import org.opensearch.http.HttpResponse; @@ -50,7 +49,7 @@ private TraceableHttpChannel(HttpChannel delegate, Span span, Tracer tracer) { * @return http channel */ public static HttpChannel create(HttpChannel delegate, Span span, Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableHttpChannel(delegate, span, tracer); } else { return delegate; diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java index d256c9d4d0e53..32769dd1d848d 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableRestChannel.java @@ -9,7 +9,6 @@ package org.opensearch.telemetry.tracing.channels; import org.opensearch.common.io.stream.BytesStreamOutput; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.xcontent.MediaType; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.rest.RestChannel; @@ -53,7 +52,7 @@ private TraceableRestChannel(RestChannel delegate, Span span, Tracer tracer) { * @return rest channel */ public static RestChannel create(RestChannel delegate, Span span, Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableRestChannel(delegate, span, tracer); } else { return delegate; diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java index bd60c35c3baac..45268b4807cd9 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/channels/TraceableTcpTransportChannel.java @@ -9,7 +9,6 @@ package org.opensearch.telemetry.tracing.channels; import org.opensearch.Version; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.core.transport.TransportResponse; import org.opensearch.telemetry.tracing.Span; @@ -53,7 +52,7 @@ public TraceableTcpTransportChannel(TcpTransportChannel delegate, Span span, Tra * @return transport channel */ public static TransportChannel create(TcpTransportChannel delegate, final Span span, final Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { delegate.getChannel().addCloseListener(new ActionListener() { @Override public void onResponse(Void unused) { diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java b/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java index 538bf82a1dbec..eb9d53d2df51b 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/handler/TraceableTransportResponseHandler.java @@ -8,7 +8,6 @@ package org.opensearch.telemetry.tracing.handler; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.transport.TransportResponse; import org.opensearch.telemetry.tracing.Span; @@ -55,7 +54,7 @@ public static TransportResponseHandler create( Span span, Tracer tracer ) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableTransportResponseHandler(delegate, span, tracer); } else { return delegate; diff --git a/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java b/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java index 3e201641a529b..0cb4ce71d05f8 100644 --- a/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java +++ b/server/src/main/java/org/opensearch/telemetry/tracing/listener/TraceableActionListener.java @@ -8,7 +8,6 @@ package org.opensearch.telemetry.tracing.listener; -import org.opensearch.common.util.FeatureFlags; import org.opensearch.core.action.ActionListener; import org.opensearch.telemetry.tracing.Span; import org.opensearch.telemetry.tracing.SpanScope; @@ -47,7 +46,7 @@ private TraceableActionListener(ActionListener delegate, Span span, Tr * @return action listener */ public static ActionListener create(ActionListener delegate, Span span, Tracer tracer) { - if (FeatureFlags.isEnabled(FeatureFlags.TELEMETRY) == true) { + if (tracer.isRecording() == true) { return new TraceableActionListener(delegate, span, tracer); } else { return delegate; diff --git a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java index 1654933806d3d..3491f18da9550 100644 --- a/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java +++ b/server/src/test/java/org/opensearch/action/admin/cluster/node/stats/NodeStatsTests.java @@ -59,6 +59,8 @@ import org.opensearch.monitor.os.OsStats; import org.opensearch.monitor.process.ProcessStats; import org.opensearch.node.AdaptiveSelectionStats; +import org.opensearch.node.NodeResourceUsageStats; +import org.opensearch.node.NodesResourceUsageStats; import org.opensearch.node.ResponseCollectorService; import org.opensearch.script.ScriptCacheStats; import org.opensearch.script.ScriptStats; @@ -282,6 +284,10 @@ public void testSerialization() throws IOException { assertEquals(ioStats.getTotalReadOperations(), deserializedIoStats.getTotalReadOperations()); assertEquals(ioStats.getTotalWriteKilobytes(), deserializedIoStats.getTotalWriteKilobytes()); assertEquals(ioStats.getTotalWriteOperations(), deserializedIoStats.getTotalWriteOperations()); + assertEquals(ioStats.getTotalReadTime(), deserializedIoStats.getTotalReadTime()); + assertEquals(ioStats.getTotalWriteTime(), deserializedIoStats.getTotalWriteTime()); + assertEquals(ioStats.getTotalQueueSize(), deserializedIoStats.getTotalQueueSize()); + assertEquals(ioStats.getTotalIOTimeMillis(), deserializedIoStats.getTotalIOTimeMillis()); assertEquals(ioStats.getDevicesStats().length, deserializedIoStats.getDevicesStats().length); for (int i = 0; i < ioStats.getDevicesStats().length; i++) { FsInfo.DeviceStats deviceStats = ioStats.getDevicesStats()[i]; @@ -393,6 +399,24 @@ public void testSerialization() throws IOException { assertEquals(aStats.responseTime, bStats.responseTime, 0.01); }); } + NodesResourceUsageStats resourceUsageStats = nodeStats.getResourceUsageStats(); + NodesResourceUsageStats deserializedResourceUsageStats = deserializedNodeStats.getResourceUsageStats(); + if (resourceUsageStats == null) { + assertNull(deserializedResourceUsageStats); + } else { + resourceUsageStats.getNodeIdToResourceUsageStatsMap().forEach((k, v) -> { + NodeResourceUsageStats aResourceUsageStats = resourceUsageStats.getNodeIdToResourceUsageStatsMap().get(k); + NodeResourceUsageStats bResourceUsageStats = deserializedResourceUsageStats.getNodeIdToResourceUsageStatsMap() + .get(k); + assertEquals( + aResourceUsageStats.getMemoryUtilizationPercent(), + bResourceUsageStats.getMemoryUtilizationPercent(), + 0.0 + ); + assertEquals(aResourceUsageStats.getCpuUtilizationPercent(), bResourceUsageStats.getCpuUtilizationPercent(), 0.0); + assertEquals(aResourceUsageStats.getTimestamp(), bResourceUsageStats.getTimestamp()); + }); + } ScriptCacheStats scriptCacheStats = nodeStats.getScriptCacheStats(); ScriptCacheStats deserializedScriptCacheStats = deserializedNodeStats.getScriptCacheStats(); if (scriptCacheStats == null) { @@ -625,6 +649,10 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), null ); deviceStatsArray[i] = new FsInfo.DeviceStats( @@ -635,6 +663,10 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { randomNonNegativeLong(), randomNonNegativeLong(), randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), + randomNonNegativeLong(), previousDeviceStats ); } @@ -756,6 +788,30 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { } adaptiveSelectionStats = new AdaptiveSelectionStats(nodeConnections, nodeStats); } + NodesResourceUsageStats nodesResourceUsageStats = null; + if (frequently()) { + int numNodes = randomIntBetween(0, 10); + Map nodeConnections = new HashMap<>(); + Map resourceUsageStatsMap = new HashMap<>(); + for (int i = 0; i < numNodes; i++) { + String nodeId = randomAlphaOfLengthBetween(3, 10); + // add outgoing connection info + if (frequently()) { + nodeConnections.put(nodeId, randomLongBetween(0, 100)); + } + // add node calculations + if (frequently()) { + NodeResourceUsageStats stats = new NodeResourceUsageStats( + nodeId, + System.currentTimeMillis(), + randomDoubleBetween(1.0, 100.0, true), + randomDoubleBetween(1.0, 100.0, true) + ); + resourceUsageStatsMap.put(nodeId, stats); + } + } + nodesResourceUsageStats = new NodesResourceUsageStats(resourceUsageStatsMap); + } ClusterManagerThrottlingStats clusterManagerThrottlingStats = null; if (frequently()) { clusterManagerThrottlingStats = new ClusterManagerThrottlingStats(); @@ -787,6 +843,7 @@ public static NodeStats createNodeStats(boolean remoteStoreStats) { discoveryStats, ingestStats, adaptiveSelectionStats, + nodesResourceUsageStats, scriptCacheStats, null, null, @@ -816,6 +873,7 @@ private static NodeIndicesStats getNodeIndicesStats(boolean remoteStoreStats) { remoteSegmentStats.setMaxRefreshTimeLag(2L); remoteSegmentStats.addTotalUploadTime(20L); remoteSegmentStats.addTotalDownloadTime(20L); + remoteSegmentStats.addTotalRejections(5L); RemoteTranslogStats remoteTranslogStats = indicesStats.getTranslog().getRemoteTranslogStats(); RemoteTranslogStats otherRemoteTranslogStats = new RemoteTranslogStats(getRandomRemoteTranslogTransferTrackerStats()); diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index f628bb3201452..edac50813e191 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -688,7 +688,11 @@ private SearchDfsQueryThenFetchAsyncAction createSearchDfsQueryThenFetchAsyncAct ); AtomicReference exception = new AtomicReference<>(); ActionListener listener = ActionListener.wrap(response -> fail("onResponse should not be called"), exception::set); - + TransportSearchAction.SearchTimeProvider timeProvider = new TransportSearchAction.SearchTimeProvider( + 0, + System.nanoTime(), + System::nanoTime + ); return new SearchDfsQueryThenFetchAsyncAction( logger, null, @@ -702,7 +706,7 @@ private SearchDfsQueryThenFetchAsyncAction createSearchDfsQueryThenFetchAsyncAct searchRequest, listener, shardsIter, - null, + timeProvider, null, task, SearchResponse.Clusters.EMPTY, @@ -734,6 +738,11 @@ private SearchQueryThenFetchAsyncAction createSearchQueryThenFetchAsyncAction( ); AtomicReference exception = new AtomicReference<>(); ActionListener listener = ActionListener.wrap(response -> fail("onResponse should not be called"), exception::set); + TransportSearchAction.SearchTimeProvider timeProvider = new TransportSearchAction.SearchTimeProvider( + 0, + System.nanoTime(), + System::nanoTime + ); return new SearchQueryThenFetchAsyncAction( logger, null, @@ -747,7 +756,7 @@ private SearchQueryThenFetchAsyncAction createSearchQueryThenFetchAsyncAction( searchRequest, listener, shardsIter, - null, + timeProvider, null, task, SearchResponse.Clusters.EMPTY, diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java index 25d8c5551880f..f025e3a63b9bf 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestTests.java @@ -244,6 +244,9 @@ private SearchRequest mutate(SearchRequest searchRequest) { ); mutators.add(() -> mutation.source(randomValueOtherThan(searchRequest.source(), this::createSearchSourceBuilder))); mutators.add(() -> mutation.setCcsMinimizeRoundtrips(searchRequest.isCcsMinimizeRoundtrips() == false)); + mutators.add( + () -> mutation.setPhaseTook(searchRequest.isPhaseTook() == null ? randomBoolean() : searchRequest.isPhaseTook() == false) + ); mutators.add( () -> mutation.setCancelAfterTimeInterval( searchRequest.getCancelAfterTimeInterval() != null diff --git a/server/src/test/java/org/opensearch/action/search/SearchResponseTests.java b/server/src/test/java/org/opensearch/action/search/SearchResponseTests.java index 097e922147698..c9e59ab4ea04d 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchResponseTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchResponseTests.java @@ -74,7 +74,9 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collections; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.UUID; import static java.util.Collections.singletonMap; @@ -152,6 +154,11 @@ public SearchResponse createTestItem( Boolean terminatedEarly = randomBoolean() ? null : randomBoolean(); int numReducePhases = randomIntBetween(1, 10); long tookInMillis = randomNonNegativeLong(); + Map phaseTookMap = new HashMap<>(); + for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { + phaseTookMap.put(searchPhaseName.getName(), randomNonNegativeLong()); + } + SearchResponse.PhaseTook phaseTook = new SearchResponse.PhaseTook(phaseTookMap); int totalShards = randomIntBetween(1, Integer.MAX_VALUE); int successfulShards = randomIntBetween(0, totalShards); int skippedShards = randomIntBetween(0, totalShards); @@ -182,6 +189,7 @@ public SearchResponse createTestItem( successfulShards, skippedShards, tookInMillis, + phaseTook, shardSearchFailures, randomBoolean() ? randomClusters() : SearchResponse.Clusters.EMPTY, null @@ -353,6 +361,14 @@ public void testToXContent() { assertEquals(1, searchExtBuilders.size()); } { + Map phaseTookMap = new HashMap<>(); + for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { + phaseTookMap.put(searchPhaseName.getName(), 0L); + } + phaseTookMap.put(SearchPhaseName.QUERY.getName(), 50L); + phaseTookMap.put(SearchPhaseName.FETCH.getName(), 25L); + phaseTookMap.put(SearchPhaseName.EXPAND.getName(), 30L); + SearchResponse.PhaseTook phaseTook = new SearchResponse.PhaseTook(phaseTookMap); SearchResponse response = new SearchResponse( new InternalSearchResponse( new SearchHits(hits, new TotalHits(100, TotalHits.Relation.EQUAL_TO), 1.5f), @@ -368,13 +384,24 @@ public void testToXContent() { 0, 0, 0, + phaseTook, ShardSearchFailure.EMPTY_ARRAY, - new SearchResponse.Clusters(5, 3, 2) + new SearchResponse.Clusters(5, 3, 2), + null ); StringBuilder expectedString = new StringBuilder(); expectedString.append("{"); { expectedString.append("\"took\":0,"); + expectedString.append("\"phase_took\":"); + { + expectedString.append("{\"dfs_pre_query\":0,"); + expectedString.append("\"query\":50,"); + expectedString.append("\"fetch\":25,"); + expectedString.append("\"dfs_query\":0,"); + expectedString.append("\"expand\":30,"); + expectedString.append("\"can_match\":0},"); + } expectedString.append("\"timed_out\":false,"); expectedString.append("\"_shards\":"); { @@ -477,6 +504,24 @@ public void testToXContentEmptyClusters() throws IOException { assertEquals(0, builder.toString().length()); } + public void testSearchResponsePhaseTookEquals() throws IOException { + SearchResponse.PhaseTook phaseTookA = new SearchResponse.PhaseTook(Map.of("foo", 0L, "bar", 1L)); + SearchResponse.PhaseTook phaseTookB = new SearchResponse.PhaseTook(Map.of("foo", 1L, "bar", 1L)); + SearchResponse.PhaseTook phaseTookC = new SearchResponse.PhaseTook(Map.of("foo", 0L)); + SearchResponse.PhaseTook phaseTookD = new SearchResponse.PhaseTook(Map.of()); + + assertNotEquals(phaseTookA, phaseTookB); + assertNotEquals(phaseTookB, phaseTookA); + assertNotEquals(phaseTookA, phaseTookC); + assertNotEquals(phaseTookC, phaseTookA); + assertNotEquals(phaseTookA, phaseTookD); + assertNotEquals(phaseTookD, phaseTookA); + assertEquals(phaseTookA, phaseTookA); + assertEquals(phaseTookB, phaseTookB); + assertEquals(phaseTookC, phaseTookC); + assertEquals(phaseTookD, phaseTookD); + } + static class DummySearchExtBuilder extends SearchExtBuilder { static ParseField DUMMY_FIELD = new ParseField("dummy"); diff --git a/server/src/test/java/org/opensearch/action/search/SearchTimeProviderTests.java b/server/src/test/java/org/opensearch/action/search/SearchTimeProviderTests.java new file mode 100644 index 0000000000000..f0f1a43e6c21e --- /dev/null +++ b/server/src/test/java/org/opensearch/action/search/SearchTimeProviderTests.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.action.search; + +import org.opensearch.test.OpenSearchTestCase; + +import java.util.concurrent.TimeUnit; + +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class SearchTimeProviderTests extends OpenSearchTestCase { + + public void testSearchTimeProviderPhaseFailure() { + TransportSearchAction.SearchTimeProvider testTimeProvider = new TransportSearchAction.SearchTimeProvider(0, 0, () -> 0); + SearchPhaseContext ctx = mock(SearchPhaseContext.class); + SearchPhase mockSearchPhase = mock(SearchPhase.class); + when(ctx.getCurrentPhase()).thenReturn(mockSearchPhase); + + for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { + when(mockSearchPhase.getSearchPhaseName()).thenReturn(searchPhaseName); + testTimeProvider.onPhaseStart(ctx); + assertNull(testTimeProvider.getPhaseTookTime(searchPhaseName)); + testTimeProvider.onPhaseFailure(ctx); + assertNull(testTimeProvider.getPhaseTookTime(searchPhaseName)); + } + } + + public void testSearchTimeProviderPhaseEnd() { + TransportSearchAction.SearchTimeProvider testTimeProvider = new TransportSearchAction.SearchTimeProvider(0, 0, () -> 0); + + SearchPhaseContext ctx = mock(SearchPhaseContext.class); + SearchPhase mockSearchPhase = mock(SearchPhase.class); + when(ctx.getCurrentPhase()).thenReturn(mockSearchPhase); + + for (SearchPhaseName searchPhaseName : SearchPhaseName.values()) { + when(mockSearchPhase.getSearchPhaseName()).thenReturn(searchPhaseName); + long tookTimeInMillis = randomIntBetween(1, 100); + testTimeProvider.onPhaseStart(ctx); + long startTime = System.nanoTime() - TimeUnit.MILLISECONDS.toNanos(tookTimeInMillis); + when(mockSearchPhase.getStartTimeInNanos()).thenReturn(startTime); + assertNull(testTimeProvider.getPhaseTookTime(searchPhaseName)); + testTimeProvider.onPhaseEnd(ctx); + assertThat(testTimeProvider.getPhaseTookTime(searchPhaseName), greaterThanOrEqualTo(tookTimeInMillis)); + } + } +} diff --git a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java index 6ecbc23f75bee..119d19cc34981 100644 --- a/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java +++ b/server/src/test/java/org/opensearch/gateway/remote/RemoteClusterStateServiceTests.java @@ -34,6 +34,7 @@ import org.opensearch.core.index.Index; import org.opensearch.core.xcontent.NamedXContentRegistry; import org.opensearch.gateway.remote.ClusterMetadataManifest.UploadedIndexMetadata; +import org.opensearch.index.remote.RemoteStoreUtils; import org.opensearch.repositories.FilterRepository; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.RepositoryMissingException; @@ -65,6 +66,9 @@ import org.mockito.ArgumentMatchers; import static org.opensearch.gateway.remote.RemoteClusterStateService.DELIMITER; +import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_CURRENT_CODEC_VERSION; +import static org.opensearch.gateway.remote.RemoteClusterStateService.INDEX_METADATA_FILE_PREFIX; +import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_CURRENT_CODEC_VERSION; import static org.opensearch.gateway.remote.RemoteClusterStateService.MANIFEST_FILE_PREFIX; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_CLUSTER_STATE_REPOSITORY_NAME_ATTRIBUTE_KEY; import static org.opensearch.node.remotestore.RemoteStoreNodeAttribute.REMOTE_STORE_REPOSITORY_SETTINGS_ATTRIBUTE_KEY_PREFIX; @@ -673,6 +677,40 @@ public void testDeleteStaleClusterUUIDs() throws IOException { } } + public void testFileNames() { + final Index index = new Index("test-index", "index-uuid"); + final Settings idxSettings = Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT) + .put(IndexMetadata.SETTING_INDEX_UUID, index.getUUID()) + .build(); + final IndexMetadata indexMetadata = new IndexMetadata.Builder(index.getName()).settings(idxSettings) + .numberOfShards(1) + .numberOfReplicas(0) + .build(); + + String indexMetadataFileName = RemoteClusterStateService.indexMetadataFileName(indexMetadata); + String[] splittedIndexMetadataFileName = indexMetadataFileName.split(DELIMITER); + assertThat(indexMetadataFileName.split(DELIMITER).length, is(4)); + assertThat(splittedIndexMetadataFileName[0], is(INDEX_METADATA_FILE_PREFIX)); + assertThat(splittedIndexMetadataFileName[1], is(RemoteStoreUtils.invertLong(indexMetadata.getVersion()))); + assertThat(splittedIndexMetadataFileName[3], is(String.valueOf(INDEX_METADATA_CURRENT_CODEC_VERSION))); + + int term = randomIntBetween(5, 10); + int version = randomIntBetween(5, 10); + String manifestFileName = RemoteClusterStateService.getManifestFileName(term, version, true); + assertThat(manifestFileName.split(DELIMITER).length, is(6)); + String[] splittedName = manifestFileName.split(DELIMITER); + assertThat(splittedName[0], is(MANIFEST_FILE_PREFIX)); + assertThat(splittedName[1], is(RemoteStoreUtils.invertLong(term))); + assertThat(splittedName[2], is(RemoteStoreUtils.invertLong(version))); + assertThat(splittedName[3], is("C")); + assertThat(splittedName[5], is(String.valueOf(MANIFEST_CURRENT_CODEC_VERSION))); + + manifestFileName = RemoteClusterStateService.getManifestFileName(term, version, false); + splittedName = manifestFileName.split(DELIMITER); + assertThat(splittedName[3], is("P")); + } + private void mockObjectsForGettingPreviousClusterUUID(Map clusterUUIDsPointers) throws IOException { final BlobPath blobPath = mock(BlobPath.class); when((blobStoreRepository.basePath())).thenReturn(blobPath); diff --git a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java index ee25d3789fb13..57509c5daa2b1 100644 --- a/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java +++ b/server/src/test/java/org/opensearch/index/engine/NRTReplicationEngineTests.java @@ -35,6 +35,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.atomic.AtomicLong; @@ -591,7 +592,9 @@ public void testCommitOnCloseThrowsException_decRefStore() throws Exception { indexOperations(nrtEngine, operations); // wipe the nrt directory initially so we can sync with primary. cleanAndCopySegmentsFromPrimary(nrtEngine); - nrtEngineStore.directory().deleteFile("_0.si"); + final Optional toDelete = Set.of(nrtEngineStore.directory().listAll()).stream().filter(f -> f.endsWith(".si")).findAny(); + assertTrue(toDelete.isPresent()); + nrtEngineStore.directory().deleteFile(toDelete.get()); assertEquals(2, nrtEngineStore.refCount()); nrtEngine.close(); assertEquals(1, nrtEngineStore.refCount()); @@ -614,11 +617,18 @@ public void testFlushThrowsFlushFailedExceptionOnCorruption() throws Exception { indexOperations(nrtEngine, operations); // wipe the nrt directory initially so we can sync with primary. cleanAndCopySegmentsFromPrimary(nrtEngine); - nrtEngineStore.directory().deleteFile("_0.si"); + final Optional toDelete = Set.of(nrtEngineStore.directory().listAll()).stream().filter(f -> f.endsWith(".si")).findAny(); + assertTrue(toDelete.isPresent()); + nrtEngineStore.directory().deleteFile(toDelete.get()); assertThrows(FlushFailedEngineException.class, nrtEngine::flush); - assertTrue(nrtEngineStore.isMarkedCorrupted()); - // store will throw when eventually closed, not handled here. - assertThrows(RuntimeException.class, nrtEngineStore::close); + nrtEngine.close(); + if (nrtEngineStore.isMarkedCorrupted()) { + assertThrows(RuntimeException.class, nrtEngineStore::close); + } else { + // With certain mock directories a NoSuchFileException is thrown which is not treated as a + // corruption Exception. In these cases we don't expect any issue on store close. + nrtEngineStore.close(); + } } private void copySegments(Collection latestPrimaryFiles, Engine nrtEngine) throws IOException { diff --git a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java index b2eb41828a4df..9ef9bec01cb38 100644 --- a/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java +++ b/server/src/test/java/org/opensearch/index/shard/IndexShardTests.java @@ -4910,6 +4910,8 @@ private void populateSampleRemoteSegmentStats(RemoteSegmentTransferTracker track tracker.addUploadBytesStarted(30L); tracker.addUploadBytesSucceeded(10L); tracker.addUploadBytesFailed(10L); + tracker.incrementRejectionCount(); + tracker.incrementRejectionCount(); } private void populateSampleRemoteTranslogStats(RemoteTranslogTransferTracker tracker) { @@ -4943,5 +4945,7 @@ private static void assertRemoteSegmentStats( assertEquals(remoteSegmentTransferTracker.getUploadBytesStarted(), remoteSegmentStats.getUploadBytesStarted()); assertEquals(remoteSegmentTransferTracker.getUploadBytesSucceeded(), remoteSegmentStats.getUploadBytesSucceeded()); assertEquals(remoteSegmentTransferTracker.getUploadBytesFailed(), remoteSegmentStats.getUploadBytesFailed()); + assertTrue(remoteSegmentStats.getTotalRejections() > 0); + assertEquals(remoteSegmentTransferTracker.getRejectionCount(), remoteSegmentStats.getTotalRejections()); } } diff --git a/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java b/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java index 0fd039b84e887..0059f8e215f2e 100644 --- a/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java +++ b/server/src/test/java/org/opensearch/monitor/fs/DeviceStatsTests.java @@ -46,7 +46,12 @@ public void testDeviceStats() { final int sectorsRead = randomIntBetween(8 * readsCompleted, 16 * readsCompleted); final int writesCompleted = randomIntBetween(1, 1 << 16); final int sectorsWritten = randomIntBetween(8 * writesCompleted, 16 * writesCompleted); - + final int readTime = randomIntBetween(1, 1 << 16); + ; + final int writeTime = randomIntBetween(1, 1 << 16); + ; + final int queueSize = randomIntBetween(1, 1 << 16); + final int ioTime = randomIntBetween(1, 1 << 16); FsInfo.DeviceStats previous = new FsInfo.DeviceStats( majorDeviceNumber, minorDeviceNumber, @@ -55,6 +60,10 @@ public void testDeviceStats() { sectorsRead, writesCompleted, sectorsWritten, + readTime, + writeTime, + queueSize, + ioTime, null ); FsInfo.DeviceStats current = new FsInfo.DeviceStats( @@ -65,6 +74,10 @@ public void testDeviceStats() { sectorsRead + 16384, writesCompleted + 2048, sectorsWritten + 32768, + readTime + 500, + writeTime + 100, + queueSize + 20, + ioTime + 8192, previous ); assertThat(current.operations(), equalTo(1024L + 2048L)); @@ -72,6 +85,10 @@ public void testDeviceStats() { assertThat(current.writeOperations(), equalTo(2048L)); assertThat(current.readKilobytes(), equalTo(16384L / 2)); assertThat(current.writeKilobytes(), equalTo(32768L / 2)); + assertEquals(500, current.readTime()); + assertEquals(100, current.writeTime()); + assertEquals(20, current.queueSize()); + assertEquals(8192, current.ioTimeInMillis()); } } diff --git a/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java b/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java index 686a624d988d7..59a888c665be7 100644 --- a/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java +++ b/server/src/test/java/org/opensearch/monitor/fs/FsProbeTests.java @@ -91,6 +91,14 @@ public void testFsInfo() throws IOException { assertThat(deviceStats.previousWritesCompleted, equalTo(-1L)); assertThat(deviceStats.currentSectorsWritten, greaterThanOrEqualTo(0L)); assertThat(deviceStats.previousSectorsWritten, equalTo(-1L)); + assertThat(deviceStats.currentReadTime, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousReadTime, greaterThanOrEqualTo(-1L)); + assertThat(deviceStats.currentWriteTime, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousWriteTime, greaterThanOrEqualTo(-1L)); + assertThat(deviceStats.currentQueueSize, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousQueueSize, greaterThanOrEqualTo(-1L)); + assertThat(deviceStats.currentIOTime, greaterThanOrEqualTo(0L)); + assertThat(deviceStats.previousIOTime, greaterThanOrEqualTo(-1L)); } } else { assertNull(stats.getIoStats()); @@ -243,6 +251,16 @@ List readProcDiskStats() throws IOException { assertThat(first.devicesStats[0].previousWritesCompleted, equalTo(-1L)); assertThat(first.devicesStats[0].currentSectorsWritten, equalTo(118857776L)); assertThat(first.devicesStats[0].previousSectorsWritten, equalTo(-1L)); + + assertEquals(33457, first.devicesStats[0].currentReadTime); + assertEquals(-1, first.devicesStats[0].previousReadTime); + assertEquals(18730966, first.devicesStats[0].currentWriteTime); + assertEquals(-1, first.devicesStats[0].previousWriteTime); + assertEquals(18767169, first.devicesStats[0].currentQueueSize); + assertEquals(-1, first.devicesStats[0].previousQueueSize); + assertEquals(1918440, first.devicesStats[0].currentIOTime); + assertEquals(-1, first.devicesStats[0].previousIOTime); + assertThat(first.devicesStats[1].majorDeviceNumber, equalTo(253)); assertThat(first.devicesStats[1].minorDeviceNumber, equalTo(2)); assertThat(first.devicesStats[1].deviceName, equalTo("dm-2")); @@ -255,6 +273,15 @@ List readProcDiskStats() throws IOException { assertThat(first.devicesStats[1].currentSectorsWritten, equalTo(64126096L)); assertThat(first.devicesStats[1].previousSectorsWritten, equalTo(-1L)); + assertEquals(49312, first.devicesStats[1].currentReadTime); + assertEquals(-1, first.devicesStats[1].previousReadTime); + assertEquals(33730596, first.devicesStats[1].currentWriteTime); + assertEquals(-1, first.devicesStats[1].previousWriteTime); + assertEquals(33781827, first.devicesStats[1].currentQueueSize); + assertEquals(-1, first.devicesStats[1].previousQueueSize); + assertEquals(1058193, first.devicesStats[1].currentIOTime); + assertEquals(-1, first.devicesStats[1].previousIOTime); + diskStats.set( Arrays.asList( " 259 0 nvme0n1 336870 0 7928397 82876 10264393 0 182986405 52451610 0 2971042 52536492", @@ -281,6 +308,16 @@ List readProcDiskStats() throws IOException { assertThat(second.devicesStats[0].previousWritesCompleted, equalTo(8398869L)); assertThat(second.devicesStats[0].currentSectorsWritten, equalTo(118857776L)); assertThat(second.devicesStats[0].previousSectorsWritten, equalTo(118857776L)); + + assertEquals(33464, second.devicesStats[0].currentReadTime); + assertEquals(33457, second.devicesStats[0].previousReadTime); + assertEquals(18730966, second.devicesStats[0].currentWriteTime); + assertEquals(18730966, second.devicesStats[0].previousWriteTime); + assertEquals(18767176, second.devicesStats[0].currentQueueSize); + assertEquals(18767169, second.devicesStats[0].previousQueueSize); + assertEquals(1918444, second.devicesStats[0].currentIOTime); + assertEquals(1918440, second.devicesStats[0].previousIOTime); + assertThat(second.devicesStats[1].majorDeviceNumber, equalTo(253)); assertThat(second.devicesStats[1].minorDeviceNumber, equalTo(2)); assertThat(second.devicesStats[1].deviceName, equalTo("dm-2")); @@ -293,11 +330,25 @@ List readProcDiskStats() throws IOException { assertThat(second.devicesStats[1].currentSectorsWritten, equalTo(64128568L)); assertThat(second.devicesStats[1].previousSectorsWritten, equalTo(64126096L)); + assertEquals(49369, second.devicesStats[1].currentReadTime); + assertEquals(49312, second.devicesStats[1].previousReadTime); + assertEquals(33730766, second.devicesStats[1].currentWriteTime); + assertEquals(33730596, second.devicesStats[1].previousWriteTime); + assertEquals(33781827, first.devicesStats[1].currentQueueSize); + assertEquals(-1L, first.devicesStats[1].previousQueueSize); + assertEquals(1058193, first.devicesStats[1].currentIOTime); + assertEquals(-1L, first.devicesStats[1].previousIOTime); + assertThat(second.totalOperations, equalTo(575L)); assertThat(second.totalReadOperations, equalTo(261L)); assertThat(second.totalWriteOperations, equalTo(314L)); assertThat(second.totalReadKilobytes, equalTo(2392L)); assertThat(second.totalWriteKilobytes, equalTo(1236L)); + + assertEquals(64, second.totalReadTime); + assertEquals(170, second.totalWriteTime); + assertEquals(236, second.totalQueueSize); + assertEquals(158, second.totalIOTimeInMillis); } public void testAdjustForHugeFilesystems() throws Exception { diff --git a/server/src/test/java/org/opensearch/node/ResourceUsageCollectorServiceTests.java b/server/src/test/java/org/opensearch/node/ResourceUsageCollectorServiceTests.java new file mode 100644 index 0000000000000..b2fa884afab69 --- /dev/null +++ b/server/src/test/java/org/opensearch/node/ResourceUsageCollectorServiceTests.java @@ -0,0 +1,190 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node; + +import org.opensearch.cluster.ClusterChangedEvent; +import org.opensearch.cluster.ClusterName; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.core.common.transport.TransportAddress; +import org.opensearch.node.resource.tracker.NodeResourceUsageTracker; +import org.opensearch.node.resource.tracker.ResourceTrackerSettings; +import org.opensearch.test.OpenSearchSingleNodeTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; + +import static org.opensearch.test.ClusterServiceUtils.createClusterService; +import static org.hamcrest.Matchers.greaterThan; + +/** + * Tests for ResourceUsageCollectorService where we test collect method, get method and whether schedulers + * are working as expected + */ +public class ResourceUsageCollectorServiceTests extends OpenSearchSingleNodeTestCase { + + private ClusterService clusterService; + private ResourceUsageCollectorService collector; + private ThreadPool threadpool; + NodeResourceUsageTracker tracker; + + @Before + public void setUp() throws Exception { + super.setUp(); + + threadpool = new TestThreadPool("resource_usage_collector_tests"); + + clusterService = createClusterService(threadpool); + + Settings settings = Settings.builder() + .put(ResourceTrackerSettings.GLOBAL_JVM_USAGE_AC_WINDOW_DURATION_SETTING.getKey(), new TimeValue(500, TimeUnit.MILLISECONDS)) + .build(); + tracker = new NodeResourceUsageTracker( + threadpool, + settings, + new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + collector = new ResourceUsageCollectorService(tracker, clusterService, threadpool); + tracker.start(); + collector.start(); + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + threadpool.shutdownNow(); + clusterService.close(); + collector.stop(); + tracker.stop(); + collector.close(); + tracker.close(); + } + + public void testResourceUsageStats() { + collector.collectNodeResourceUsageStats("node1", System.currentTimeMillis(), 97, 99); + Map nodeStats = collector.getAllNodeStatistics(); + assertTrue(nodeStats.containsKey("node1")); + assertEquals(99.0, nodeStats.get("node1").cpuUtilizationPercent, 0.0); + assertEquals(97.0, nodeStats.get("node1").memoryUtilizationPercent, 0.0); + + Optional nodeResourceUsageStatsOptional = collector.getNodeStatistics("node1"); + + assertNotNull(nodeResourceUsageStatsOptional.get()); + assertEquals(99.0, nodeResourceUsageStatsOptional.get().cpuUtilizationPercent, 0.0); + assertEquals(97.0, nodeResourceUsageStatsOptional.get().memoryUtilizationPercent, 0.0); + + nodeResourceUsageStatsOptional = collector.getNodeStatistics("node2"); + assertTrue(nodeResourceUsageStatsOptional.isEmpty()); + } + + public void testScheduler() throws Exception { + /** + * Wait for cluster state to be ready so that localNode().getId() is ready and we add the values to the map + */ + assertBusy(() -> assertTrue(collector.getNodeStatistics(clusterService.localNode().getId()).isPresent()), 1, TimeUnit.MINUTES); + assertTrue(collector.getNodeStatistics(clusterService.localNode().getId()).isPresent()); + /** + * Wait for memory utilization to be reported greater than 0 + */ + assertBusy( + () -> assertThat( + collector.getNodeStatistics(clusterService.localNode().getId()).get().getMemoryUtilizationPercent(), + greaterThan(0.0) + ), + 5, + TimeUnit.SECONDS + ); + assertTrue(collector.getNodeStatistics("Invalid").isEmpty()); + } + + /* + * Test that concurrently adding values and removing nodes does not cause exceptions + */ + public void testConcurrentAddingAndRemovingNodes() throws Exception { + String[] nodes = new String[] { "a", "b", "c", "d" }; + + final CountDownLatch latch = new CountDownLatch(5); + + Runnable f = () -> { + latch.countDown(); + try { + latch.await(); + } catch (InterruptedException e) { + fail("should not be interrupted"); + } + for (int i = 0; i < randomIntBetween(100, 200); i++) { + if (randomBoolean()) { + collector.removeNodeResourceUsageStats(randomFrom(nodes)); + } + collector.collectNodeResourceUsageStats( + randomFrom(nodes), + System.currentTimeMillis(), + randomIntBetween(1, 100), + randomIntBetween(1, 100) + ); + } + }; + + Thread t1 = new Thread(f); + Thread t2 = new Thread(f); + Thread t3 = new Thread(f); + Thread t4 = new Thread(f); + + t1.start(); + t2.start(); + t3.start(); + t4.start(); + latch.countDown(); + t1.join(); + t2.join(); + t3.join(); + t4.join(); + + final Map nodeStats = collector.getAllNodeStatistics(); + for (String nodeId : nodes) { + if (nodeStats.containsKey(nodeId)) { + assertThat(nodeStats.get(nodeId).memoryUtilizationPercent, greaterThan(0.0)); + assertThat(nodeStats.get(nodeId).cpuUtilizationPercent, greaterThan(0.0)); + } + } + } + + public void testNodeRemoval() { + collector.collectNodeResourceUsageStats("node1", System.currentTimeMillis(), randomIntBetween(1, 100), randomIntBetween(1, 100)); + collector.collectNodeResourceUsageStats("node2", System.currentTimeMillis(), randomIntBetween(1, 100), randomIntBetween(1, 100)); + + ClusterState previousState = ClusterState.builder(new ClusterName("cluster")) + .nodes( + DiscoveryNodes.builder() + .add(DiscoveryNode.createLocal(Settings.EMPTY, new TransportAddress(TransportAddress.META_ADDRESS, 9200), "node1")) + .add(DiscoveryNode.createLocal(Settings.EMPTY, new TransportAddress(TransportAddress.META_ADDRESS, 9201), "node2")) + ) + .build(); + ClusterState newState = ClusterState.builder(previousState) + .nodes(DiscoveryNodes.builder(previousState.nodes()).remove("node2")) + .build(); + ClusterChangedEvent event = new ClusterChangedEvent("test", newState, previousState); + + collector.clusterChanged(event); + final Map nodeStats = collector.getAllNodeStatistics(); + assertTrue(nodeStats.containsKey("node1")); + assertFalse(nodeStats.containsKey("node2")); + } +} diff --git a/server/src/test/java/org/opensearch/node/resource/tracker/AverageUsageTrackerTests.java b/server/src/test/java/org/opensearch/node/resource/tracker/AverageUsageTrackerTests.java new file mode 100644 index 0000000000000..374c993a264d4 --- /dev/null +++ b/server/src/test/java/org/opensearch/node/resource/tracker/AverageUsageTrackerTests.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node.resource.tracker; + +import org.opensearch.common.unit.TimeValue; +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.util.concurrent.TimeUnit; + +/** + * Tests to validate AverageMemoryUsageTracker and AverageCpuUsageTracker implementation + */ +public class AverageUsageTrackerTests extends OpenSearchTestCase { + ThreadPool threadPool; + AverageMemoryUsageTracker averageMemoryUsageTracker; + AverageCpuUsageTracker averageCpuUsageTracker; + + @Before + public void setup() { + threadPool = new TestThreadPool(getClass().getName()); + averageMemoryUsageTracker = new AverageMemoryUsageTracker( + threadPool, + new TimeValue(500, TimeUnit.MILLISECONDS), + new TimeValue(1000, TimeUnit.MILLISECONDS) + ); + averageCpuUsageTracker = new AverageCpuUsageTracker( + threadPool, + new TimeValue(500, TimeUnit.MILLISECONDS), + new TimeValue(1000, TimeUnit.MILLISECONDS) + ); + } + + @After + public void cleanup() { + ThreadPool.terminate(threadPool, 5, TimeUnit.SECONDS); + } + + public void testBasicUsage() { + + assertAverageUsageStats(averageMemoryUsageTracker); + assertAverageUsageStats(averageCpuUsageTracker); + } + + public void testUpdateWindowSize() { + assertUpdateWindowSize(averageMemoryUsageTracker); + assertUpdateWindowSize(averageCpuUsageTracker); + } + + private void assertAverageUsageStats(AbstractAverageUsageTracker usageTracker) { + usageTracker.recordUsage(1); + assertFalse(usageTracker.isReady()); + usageTracker.recordUsage(2); + assertTrue(usageTracker.isReady()); + assertEquals(2, usageTracker.getWindowSize()); + assertEquals(1.5, usageTracker.getAverage(), 0.0); + usageTracker.recordUsage(5); + // ( 2 + 5 ) / 2 = 3.5 + assertEquals(3.5, usageTracker.getAverage(), 0.0); + } + + private void assertUpdateWindowSize(AbstractAverageUsageTracker usageTracker) { + usageTracker.recordUsage(1); + usageTracker.recordUsage(2); + + assertEquals(2, usageTracker.getWindowSize()); + assertEquals(1.5, usageTracker.getAverage(), 0.0); + usageTracker.recordUsage(5); + // ( 2 + 5 ) / 2 = 3.5 + assertEquals(3.5, usageTracker.getAverage(), 0.0); + + usageTracker.setWindowSize(new TimeValue(2000, TimeUnit.MILLISECONDS)); + assertEquals(0, usageTracker.getWindowSize()); + assertEquals(0.0, usageTracker.getAverage(), 0.0); + // verify 2000/500 = 4 is the window size and average is calculated on window size of 4 + usageTracker.recordUsage(1); + usageTracker.recordUsage(2); + usageTracker.recordUsage(1); + assertFalse(usageTracker.isReady()); + usageTracker.recordUsage(2); + assertTrue(usageTracker.isReady()); + assertEquals(4, usageTracker.getWindowSize()); + // (1 + 2 + 1 + 2 ) / 4 = 1.5 + assertEquals(1.5, usageTracker.getAverage(), 0.0); + usageTracker.recordUsage(2); + assertTrue(usageTracker.isReady()); + // ( 2 + 1 + 2 + 2 ) / 4 = 1.75 + assertEquals(1.75, usageTracker.getAverage(), 0.0); + } +} diff --git a/server/src/test/java/org/opensearch/node/resource/tracker/NodeResourceUsageTrackerTests.java b/server/src/test/java/org/opensearch/node/resource/tracker/NodeResourceUsageTrackerTests.java new file mode 100644 index 0000000000000..1ce68b9f29062 --- /dev/null +++ b/server/src/test/java/org/opensearch/node/resource/tracker/NodeResourceUsageTrackerTests.java @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.node.resource.tracker; + +import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.test.OpenSearchSingleNodeTestCase; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.junit.After; +import org.junit.Before; + +import java.util.concurrent.TimeUnit; + +import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked; +import static org.hamcrest.Matchers.greaterThan; + +/** + * Tests to assert resource usage trackers retrieving resource utilization averages + */ +public class NodeResourceUsageTrackerTests extends OpenSearchSingleNodeTestCase { + ThreadPool threadPool; + + @Before + public void setup() { + threadPool = new TestThreadPool(getClass().getName()); + } + + @After + public void cleanup() { + ThreadPool.terminate(threadPool, 5, TimeUnit.SECONDS); + assertAcked( + client().admin() + .cluster() + .prepareUpdateSettings() + .setPersistentSettings(Settings.builder().putNull("*")) + .setTransientSettings(Settings.builder().putNull("*")) + ); + } + + public void testStats() throws Exception { + Settings settings = Settings.builder() + .put(ResourceTrackerSettings.GLOBAL_JVM_USAGE_AC_WINDOW_DURATION_SETTING.getKey(), new TimeValue(500, TimeUnit.MILLISECONDS)) + .build(); + NodeResourceUsageTracker tracker = new NodeResourceUsageTracker( + threadPool, + settings, + new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + tracker.start(); + /** + * Asserting memory utilization to be greater than 0 + * cpu percent used is mostly 0, so skipping assertion for that + */ + assertBusy(() -> assertThat(tracker.getMemoryUtilizationPercent(), greaterThan(0.0)), 5, TimeUnit.SECONDS); + tracker.stop(); + tracker.close(); + } + + public void testUpdateSettings() { + NodeResourceUsageTracker tracker = new NodeResourceUsageTracker( + threadPool, + Settings.EMPTY, + new ClusterSettings(Settings.EMPTY, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS) + ); + + assertEquals(tracker.getResourceTrackerSettings().getCpuWindowDuration().getSeconds(), 30); + assertEquals(tracker.getResourceTrackerSettings().getMemoryWindowDuration().getSeconds(), 30); + + Settings settings = Settings.builder() + .put(ResourceTrackerSettings.GLOBAL_CPU_USAGE_AC_WINDOW_DURATION_SETTING.getKey(), "10s") + .build(); + ClusterUpdateSettingsResponse response = client().admin().cluster().prepareUpdateSettings().setPersistentSettings(settings).get(); + assertEquals( + "10s", + response.getPersistentSettings().get(ResourceTrackerSettings.GLOBAL_CPU_USAGE_AC_WINDOW_DURATION_SETTING.getKey()) + ); + + Settings jvmsettings = Settings.builder() + .put(ResourceTrackerSettings.GLOBAL_JVM_USAGE_AC_WINDOW_DURATION_SETTING.getKey(), "5s") + .build(); + response = client().admin().cluster().prepareUpdateSettings().setPersistentSettings(jvmsettings).get(); + assertEquals( + "5s", + response.getPersistentSettings().get(ResourceTrackerSettings.GLOBAL_JVM_USAGE_AC_WINDOW_DURATION_SETTING.getKey()) + ); + } +} diff --git a/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java b/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java index 5d5ea62dd161e..80942123fd4fd 100644 --- a/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java +++ b/server/src/test/java/org/opensearch/telemetry/metrics/MetricsRegistryFactoryTests.java @@ -63,6 +63,18 @@ public void testGetMetricsWithAvailableMetricsTelemetry() { } + public void testNullMetricsTelemetry() { + Settings settings = Settings.builder().put(TelemetrySettings.METRICS_FEATURE_ENABLED_SETTING.getKey(), false).build(); + TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); + Telemetry mockTelemetry = mock(Telemetry.class); + when(mockTelemetry.getMetricsTelemetry()).thenReturn(null); + metricsRegistryFactory = new MetricsRegistryFactory(telemetrySettings, Optional.of(mockTelemetry)); + + MetricsRegistry metricsRegistry = metricsRegistryFactory.getMetricsRegistry(); + assertTrue(metricsRegistry instanceof NoopMetricsRegistry); + + } + private Set> getClusterSettings() { Set> allTracerSettings = new HashSet<>(); ClusterSettings.FEATURE_FLAGGED_CLUSTER_SETTINGS.get(List.of(FeatureFlags.TELEMETRY)).stream().forEach((allTracerSettings::add)); diff --git a/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java b/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java index b27f888eaf502..3a388be22445e 100644 --- a/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java +++ b/server/src/test/java/org/opensearch/telemetry/tracing/TracerFactoryTests.java @@ -83,6 +83,18 @@ public void testGetTracerWithAvailableTracingTelemetryReturnsWrappedTracer() { } + public void testNullTracer() { + Settings settings = Settings.builder().put(TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING.getKey(), false).build(); + TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); + Telemetry mockTelemetry = mock(Telemetry.class); + when(mockTelemetry.getTracingTelemetry()).thenReturn(null); + tracerFactory = new TracerFactory(telemetrySettings, Optional.of(mockTelemetry), new ThreadContext(Settings.EMPTY)); + + Tracer tracer = tracerFactory.getTracer(); + assertTrue(tracer instanceof NoopTracer); + + } + private Set> getClusterSettings() { Set> allTracerSettings = new HashSet<>(); ClusterSettings.FEATURE_FLAGGED_CLUSTER_SETTINGS.get(List.of(FeatureFlags.TELEMETRY)).stream().forEach((allTracerSettings::add)); diff --git a/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java b/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java index 43e0cb8e44439..8606104d26103 100644 --- a/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java +++ b/server/src/test/java/org/opensearch/telemetry/tracing/WrappedTracerTests.java @@ -26,6 +26,7 @@ import static org.mockito.Mockito.mock; import static org.mockito.Mockito.never; import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; public class WrappedTracerTests extends OpenSearchTestCase { @@ -38,6 +39,7 @@ public void testStartSpanWithTracingDisabledInvokesNoopTracer() throws Exception SpanCreationContext spanCreationContext = SpanCreationContext.internal().name("foo"); wrappedTracer.startSpan(spanCreationContext); assertTrue(wrappedTracer.getDelegateTracer() instanceof NoopTracer); + assertFalse(wrappedTracer.isRecording()); verify(mockDefaultTracer, never()).startSpan(SpanCreationContext.internal().name("foo")); } } @@ -46,12 +48,13 @@ public void testStartSpanWithTracingEnabledInvokesDefaultTracer() throws Excepti Settings settings = Settings.builder().put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true).build(); TelemetrySettings telemetrySettings = new TelemetrySettings(settings, new ClusterSettings(settings, getClusterSettings())); DefaultTracer mockDefaultTracer = mock(DefaultTracer.class); - + when(mockDefaultTracer.isRecording()).thenReturn(true); try (WrappedTracer wrappedTracer = new WrappedTracer(telemetrySettings, mockDefaultTracer)) { SpanCreationContext spanCreationContext = SpanCreationContext.internal().name("foo"); wrappedTracer.startSpan(spanCreationContext); assertTrue(wrappedTracer.getDelegateTracer() instanceof DefaultTracer); + assertTrue(wrappedTracer.isRecording()); verify(mockDefaultTracer).startSpan(eq(spanCreationContext)); } } diff --git a/test/fixtures/hdfs-fixture/build.gradle b/test/fixtures/hdfs-fixture/build.gradle index 7de98c0986b87..13345fcb20de1 100644 --- a/test/fixtures/hdfs-fixture/build.gradle +++ b/test/fixtures/hdfs-fixture/build.gradle @@ -71,6 +71,6 @@ dependencies { runtimeOnly("com.squareup.okhttp3:okhttp:4.11.0") { exclude group: "com.squareup.okio" } - runtimeOnly "com.squareup.okio:okio:3.5.0" + runtimeOnly "com.squareup.okio:okio:3.6.0" runtimeOnly "org.xerial.snappy:snappy-java:1.1.10.5" } diff --git a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java index 0c67d0b242b58..60a54110fd0b4 100644 --- a/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java +++ b/test/framework/src/main/java/org/opensearch/cluster/MockInternalClusterInfoService.java @@ -112,6 +112,7 @@ List adjustNodesStats(List nodesStats) { nodeStats.getDiscoveryStats(), nodeStats.getIngestStats(), nodeStats.getAdaptiveSelectionStats(), + nodeStats.getResourceUsageStats(), nodeStats.getScriptCacheStats(), nodeStats.getIndexingPressureStats(), nodeStats.getShardIndexingPressureStats(), diff --git a/test/framework/src/main/java/org/opensearch/search/RandomSearchRequestGenerator.java b/test/framework/src/main/java/org/opensearch/search/RandomSearchRequestGenerator.java index b942136e1f1e2..74de1e6d96d93 100644 --- a/test/framework/src/main/java/org/opensearch/search/RandomSearchRequestGenerator.java +++ b/test/framework/src/main/java/org/opensearch/search/RandomSearchRequestGenerator.java @@ -131,6 +131,9 @@ public static SearchRequest randomSearchRequest(Supplier ra if (randomBoolean()) { searchRequest.setCancelAfterTimeInterval(TimeValue.parseTimeValue(randomTimeValue(), null, "cancel_after_time_interval")); } + if (randomBoolean()) { + searchRequest.setPhaseTook(randomBoolean()); + } return searchRequest; } diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java index da829a3bc5225..c16cc1d2a5fba 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchIntegTestCase.java @@ -1928,6 +1928,7 @@ protected Settings nodeSettings(int nodeOrdinal) { // Enable tracer only when Telemetry Setting is enabled if (featureFlagSettings().getAsBoolean(FeatureFlags.TELEMETRY_SETTING.getKey(), false)) { + builder.put(TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING.getKey(), true); builder.put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true); } if (FeatureFlags.CONCURRENT_SEGMENT_SEARCH_SETTING.get(featureFlagSettings)) { diff --git a/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java b/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java index f14fe3bf3961c..efc29d1c254e6 100644 --- a/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java +++ b/test/framework/src/main/java/org/opensearch/test/OpenSearchSingleNodeTestCase.java @@ -254,6 +254,7 @@ private Node newNode() { .putList(INITIAL_CLUSTER_MANAGER_NODES_SETTING.getKey(), nodeName) .put(FeatureFlags.TELEMETRY_SETTING.getKey(), true) .put(TelemetrySettings.TRACER_ENABLED_SETTING.getKey(), true) + .put(TelemetrySettings.TRACER_FEATURE_ENABLED_SETTING.getKey(), true) .put(nodeSettings()) // allow test cases to provide their own settings or override these .put(featureFlagSettings); if (FeatureFlags.CONCURRENT_SEGMENT_SEARCH_SETTING.get(featureFlagSettings)) { @@ -271,6 +272,7 @@ private Node newNode() { plugins.add(MockHttpTransport.TestPlugin.class); } plugins.add(MockScriptService.TestPlugin.class); + plugins.add(MockTelemetryPlugin.class); Node node = new MockNode(settingsBuilder.build(), plugins, forbidPrivateIndexSettings()); try { diff --git a/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java b/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java index 95321a7009be9..dda413ce2818e 100644 --- a/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java +++ b/test/framework/src/main/java/org/opensearch/test/telemetry/MockTelemetry.java @@ -12,6 +12,7 @@ import org.opensearch.telemetry.TelemetrySettings; import org.opensearch.telemetry.metrics.Counter; import org.opensearch.telemetry.metrics.MetricsTelemetry; +import org.opensearch.telemetry.metrics.noop.NoopCounter; import org.opensearch.telemetry.tracing.TracingTelemetry; import org.opensearch.test.telemetry.tracing.MockTracingTelemetry; @@ -37,12 +38,12 @@ public MetricsTelemetry getMetricsTelemetry() { return new MetricsTelemetry() { @Override public Counter createCounter(String name, String description, String unit) { - return null; + return NoopCounter.INSTANCE; } @Override public Counter createUpDownCounter(String name, String description, String unit) { - return null; + return NoopCounter.INSTANCE; } @Override