Ensure LTR models are cached when used as a rescorer. (elastic#106161)

stefnestor · Mar 11, 2024 · ad161a1 · ad161a1
1 parent 5e14aca
commit ad161a1
Show file tree

Hide file tree

Showing 2 changed files with 39 additions and 4 deletions.
diff --git a/...rc/main/java/org/elasticsearch/xpack/ml/inference/loadingservice/ModelLoadingService.java b/...rc/main/java/org/elasticsearch/xpack/ml/inference/loadingservice/ModelLoadingService.java
@@ -647,7 +647,7 @@ private void handleLoadSuccess(
             // Also, if the consumer is a search consumer, we should always cache it
             if (referencedModels.contains(modelId)
                 || Sets.haveNonEmptyIntersection(modelIdToModelAliases.getOrDefault(modelId, new HashSet<>()), referencedModels)
-                || consumer.equals(Consumer.SEARCH_AGGS)) {
+                || consumer.isAnyOf(Consumer.SEARCH_AGGS, Consumer.SEARCH_RESCORER)) {
                 try {
                     // The local model may already be in cache. If it is, we don't bother adding it to cache.
                     // If it isn't, we flip an `isLoaded` flag, and increment the model counter to make sure if it is evicted
@@ -810,7 +810,8 @@ public void clusterChanged(ClusterChangedEvent event) {
                 );
                 if (oldModelAliasesNotReferenced && newModelAliasesNotReferenced && modelIsNotReferenced) {
                     ModelAndConsumer modelAndConsumer = localModelCache.get(modelId);
-                    if (modelAndConsumer != null && modelAndConsumer.consumers.contains(Consumer.SEARCH_AGGS) == false) {
+                    if (modelAndConsumer != null
+                        && modelAndConsumer.consumers.stream().noneMatch(c -> c.isAnyOf(Consumer.SEARCH_AGGS, Consumer.SEARCH_RESCORER))) {
                         logger.trace("[{} ({})] invalidated from cache", modelId, modelAliasOrId);
                         localModelCache.invalidate(modelId);
                     }

diff --git a/...st/java/org/elasticsearch/xpack/ml/inference/loadingservice/ModelLoadingServiceTests.java b/...st/java/org/elasticsearch/xpack/ml/inference/loadingservice/ModelLoadingServiceTests.java
@@ -43,7 +43,9 @@
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
 import org.elasticsearch.xpack.core.ml.inference.TrainedModelInput;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.ClassificationConfig;
+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.InferenceStats;
+import org.elasticsearch.xpack.core.ml.inference.trainedmodel.LearningToRankConfig;
 import org.elasticsearch.xpack.core.ml.inference.trainedmodel.inference.InferenceDefinition;
 import org.elasticsearch.xpack.core.ml.job.messages.Messages;
 import org.elasticsearch.xpack.ml.MachineLearning;
@@ -424,6 +426,34 @@ public void testGetModelForSearch() throws Exception {
         verify(trainedModelStatsService, never()).queueStats(any(InferenceStats.class), anyBoolean());
     }
 
+    public void testGetModelForLearningToRank() throws Exception {
+        String modelId = "test-get-model-for-ltr";
+        withTrainedModel(modelId, 1L, LearningToRankConfig.EMPTY_PARAMS);
+
+        ModelLoadingService modelLoadingService = new ModelLoadingService(
+            trainedModelProvider,
+            auditor,
+            threadPool,
+            clusterService,
+            trainedModelStatsService,
+            Settings.EMPTY,
+            "test-node",
+            circuitBreaker,
+            mock(XPackLicenseState.class)
+        );
+
+        for (int i = 0; i < 3; i++) {
+            PlainActionFuture<LocalModel> future = new PlainActionFuture<>();
+            modelLoadingService.getModelForLearningToRank(modelId, future);
+            assertThat(future.get(), is(not(nullValue())));
+        }
+
+        assertTrue(modelLoadingService.isModelCached(modelId));
+
+        verify(trainedModelProvider, times(1)).getTrainedModelForInference(eq(modelId), eq(false), any());
+        verify(trainedModelStatsService, never()).queueStats(any(InferenceStats.class), anyBoolean());
+    }
+
     public void testCircuitBreakerBreak() throws Exception {
         String model1 = "test-circuit-break-model-1";
         String model2 = "test-circuit-break-model-2";
@@ -656,13 +686,17 @@ public void testAliasesGetUpdatedEvenWhenNotIngestNode() throws IOException {
         assertThat(modelLoadingService.getModelId("loaded_model_again"), equalTo(model1));
     }
 
-    @SuppressWarnings("unchecked")
     private void withTrainedModel(String modelId, long size) {
+        withTrainedModel(modelId, size, ClassificationConfig.EMPTY_PARAMS);
+    }
+
+    @SuppressWarnings("unchecked")
+    private void withTrainedModel(String modelId, long size, InferenceConfig inferenceConfig) {
         InferenceDefinition definition = mock(InferenceDefinition.class);
         when(definition.ramBytesUsed()).thenReturn(size);
         TrainedModelConfig trainedModelConfig = mock(TrainedModelConfig.class);
         when(trainedModelConfig.getModelId()).thenReturn(modelId);
-        when(trainedModelConfig.getInferenceConfig()).thenReturn(ClassificationConfig.EMPTY_PARAMS);
+        when(trainedModelConfig.getInferenceConfig()).thenReturn(inferenceConfig);
         when(trainedModelConfig.getInput()).thenReturn(new TrainedModelInput(Arrays.asList("foo", "bar", "baz")));
         when(trainedModelConfig.getModelSize()).thenReturn(size);
         doAnswer(invocationOnMock -> {