From 0048bc8ed41f36e085bba6b7ec4ef1beb0d4f0ec Mon Sep 17 00:00:00 2001 From: James Gowdy Date: Tue, 22 Jun 2021 21:24:31 +0100 Subject: [PATCH] [ML] Fixing categorization tokens for multi-line messages --- .../models/job_service/new_job/categorization/examples.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/x-pack/plugins/ml/server/models/job_service/new_job/categorization/examples.ts b/x-pack/plugins/ml/server/models/job_service/new_job/categorization/examples.ts index 5fecb3d9eb1ec..abda62097dfb7 100644 --- a/x-pack/plugins/ml/server/models/job_service/new_job/categorization/examples.ts +++ b/x-pack/plugins/ml/server/models/job_service/new_job/categorization/examples.ts @@ -145,10 +145,11 @@ export function categorizationExamplesProvider({ for (let g = 0; g < sumLengths.length; g++) { if (t.start_offset <= sumLengths[g] + g) { const offset = g > 0 ? sumLengths[g - 1] + g : 0; + const start = t.start_offset - offset; tokensPerExample[g].push({ ...t, - start_offset: t.start_offset - offset, - end_offset: t.end_offset - offset, + start_offset: start, + end_offset: start + t.token.length, }); break; }