Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Validate at least one feature is available for DF analytics #55876

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion x-pack/plugin/ml/qa/ml-with-security/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,8 @@ integTest.runner {
'ml/preview_datafeed/Test preview missing datafeed',
'ml/revert_model_snapshot/Test revert model with invalid snapshotId',
'ml/start_data_frame_analytics/Test start given missing source index',
'ml/start_data_frame_analytics/Test start given source index has no compatible fields',
'ml/start_data_frame_analytics/Test start outlier_detection given source index has no fields',
'ml/start_data_frame_analytics/Test start regression given source index only has dependent variable',
'ml/start_data_frame_analytics/Test start with inconsistent body/param ids',
'ml/start_data_frame_analytics/Test start given dest index is not empty',
'ml/start_data_frame_analytics/Test start with compatible fields but no data',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsState;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsTaskState;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.RequiredField;
import org.elasticsearch.xpack.core.ml.job.messages.Messages;
import org.elasticsearch.xpack.core.ml.job.persistence.AnomalyDetectorsIndex;
import org.elasticsearch.xpack.core.ml.utils.ExceptionsHelper;
Expand All @@ -83,7 +84,9 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import static org.elasticsearch.xpack.core.ClientHelper.ML_ORIGIN;
import static org.elasticsearch.xpack.core.ClientHelper.executeAsyncWithOrigin;
Expand Down Expand Up @@ -244,7 +247,7 @@ private void getStartContext(String id, Task task, ActionListener<StartContext>
ParentTaskAssigningClient parentTaskClient = new ParentTaskAssigningClient(client, task.getParentTaskId());
// Step 7. Validate that there are analyzable data in the source index
ActionListener<StartContext> validateMappingsMergeListener = ActionListener.wrap(
startContext -> validateSourceIndexHasRows(startContext, finalListener),
startContext -> validateSourceIndexHasAnalyzableData(startContext, finalListener),
finalListener::onFailure
);

Expand Down Expand Up @@ -319,6 +322,37 @@ private void getStartContext(String id, Task task, ActionListener<StartContext>
configProvider.get(id, getConfigListener);
}

private void validateSourceIndexHasAnalyzableData(StartContext startContext, ActionListener<StartContext> listener) {
ActionListener<Void> validateAtLeastOneAnalyzedFieldListener = ActionListener.wrap(
aVoid -> validateSourceIndexHasRows(startContext, listener),
listener::onFailure
);

validateSourceIndexHasAtLeastOneAnalyzedField(startContext, validateAtLeastOneAnalyzedFieldListener);
}

private void validateSourceIndexHasAtLeastOneAnalyzedField(StartContext startContext, ActionListener<Void> listener) {
Set<String> requiredFields = startContext.config.getAnalysis().getRequiredFields().stream()
.map(RequiredField::getName)
.collect(Collectors.toSet());

// We assume here that required fields are not features
long nonRequiredFieldsCount = startContext.extractedFields.getAllFields().stream()
.filter(extractedField -> requiredFields.contains(extractedField.getName()) == false)
.count();
if (nonRequiredFieldsCount == 0) {
StringBuilder msgBuilder = new StringBuilder("at least one field must be included in the analysis");
if (requiredFields.isEmpty() == false) {
msgBuilder.append(" (excluding fields ")
.append(requiredFields)
.append(")");
}
listener.onFailure(ExceptionsHelper.badRequestException(msgBuilder.toString()));
} else {
listener.onResponse(null);
}
}

private void validateSourceIndexHasRows(StartContext startContext, ActionListener<StartContext> listener) {
DataFrameDataExtractorFactory extractorFactory = DataFrameDataExtractorFactory.createForSourceIndices(client,
"validate_source_index_has_rows-" + startContext.config.getId(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,12 +95,6 @@ private Set<String> getIncludedFields(Set<FieldSelection> fieldSelection) {
}
includeAndExcludeFields(fields, fieldSelection);

if (fields.isEmpty()) {
throw ExceptionsHelper.badRequestException("No compatible fields could be detected in index {}. Supported types are {}.",
Arrays.toString(index),
getSupportedTypes());
}

return fields;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.is;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

Expand Down Expand Up @@ -91,10 +92,14 @@ public void testDetect_GivenOutlierDetectionAndNonNumericField() {

ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap());
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();

assertThat(e.getMessage(), equalTo("No compatible fields could be detected in index [source_index]." +
" Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]."));
assertThat(fieldExtraction.v1().getAllFields().isEmpty(), is(true));
assertThat(fieldExtraction.v2().size(), equalTo(1));
assertThat(fieldExtraction.v2().get(0).getName(), equalTo("some_keyword"));
assertThat(fieldExtraction.v2().get(0).isIncluded(), is(false));
assertThat(fieldExtraction.v2().get(0).getReason(), equalTo("unsupported type; supported types are " +
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short]"));
}

public void testDetect_GivenOutlierDetectionAndFieldWithNumericAndNonNumericTypes() {
Expand All @@ -103,10 +108,14 @@ public void testDetect_GivenOutlierDetectionAndFieldWithNumericAndNonNumericType

ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap());
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();

assertThat(e.getMessage(), equalTo("No compatible fields could be detected in index [source_index]. " +
"Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]."));
assertThat(fieldExtraction.v1().getAllFields().isEmpty(), is(true));
assertThat(fieldExtraction.v2().size(), equalTo(1));
assertThat(fieldExtraction.v2().get(0).getName(), equalTo("indecisive_field"));
assertThat(fieldExtraction.v2().get(0).isIncluded(), is(false));
assertThat(fieldExtraction.v2().get(0).getReason(), equalTo("unsupported type; supported types are " +
"[boolean, byte, double, float, half_float, integer, long, scaled_float, short]"));
}

public void testDetect_GivenOutlierDetectionAndMultipleFields() {
Expand Down Expand Up @@ -306,10 +315,10 @@ public void testDetect_GivenIgnoredField() {

ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap());
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();

assertThat(e.getMessage(), equalTo("No compatible fields could be detected in index [source_index]. " +
"Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]."));
assertThat(fieldExtraction.v1().getAllFields().isEmpty(), is(true));
assertThat(fieldExtraction.v2().isEmpty(), is(true));
}

public void testDetect_GivenIncludedIgnoredField() {
Expand Down Expand Up @@ -410,9 +419,11 @@ public void testDetect_GivenExcludeAllValidFields() {

ExtractedFieldsDetector extractedFieldsDetector = new ExtractedFieldsDetector(
SOURCE_INDEX, buildOutlierDetectionConfig(), 100, fieldCapabilities, Collections.emptyMap());
ElasticsearchStatusException e = expectThrows(ElasticsearchStatusException.class, extractedFieldsDetector::detect);
assertThat(e.getMessage(), equalTo("No compatible fields could be detected in index [source_index]. " +
"Supported types are [boolean, byte, double, float, half_float, integer, long, scaled_float, short]."));
Tuple<ExtractedFields, List<FieldSelection>> fieldExtraction = extractedFieldsDetector.detect();

assertThat(fieldExtraction.v1().getAllFields().isEmpty(), is(true));
assertThat(fieldExtraction.v2().size(), equalTo(2));
assertThat(fieldExtraction.v2().stream().filter(FieldSelection::isIncluded).findAny().isPresent(), is(false));
}

public void testDetect_GivenInclusionsAndExclusions() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
id: "missing_index"

---
"Test start given source index has no compatible fields":
"Test start outlier_detection given source index has no fields":

- do:
indices.create:
Expand All @@ -57,7 +57,41 @@
}

- do:
catch: /No compatible fields could be detected in index \[empty-index\]/
catch: /at least one field must be included in the analysis/
ml.start_data_frame_analytics:
id: "foo"

---
"Test start regression given source index only has dependent variable":

- do:
indices.create:
index: index-with-dep-var-only
body:
mappings:
properties:
my_dep_var: { type: "long" }

- do:
ml.put_data_frame_analytics:
id: "foo"
body: >
{
"source": {
"index": "index-with-dep-var-only"
},
"dest": {
"index": "results"
},
"analysis": {
"regression":{
"dependent_variable": "my_dep_var"
}
}
}

- do:
catch: /at least one field must be included in the analysis \(excluding fields \[my_dep_var\]\)/
ml.start_data_frame_analytics:
id: "foo"

Expand Down