Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Security Solution] Adds diff algorithm and unit tests for multi-line string fields #188022

Merged
merged 5 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -1097,6 +1097,7 @@
"monaco-editor": "^0.44.0",
"monaco-yaml": "^5.1.0",
"mustache": "^2.3.2",
"node-diff3": "^3.1.2",
banderror marked this conversation as resolved.
Show resolved Hide resolved
"node-fetch": "^2.6.7",
"node-forge": "^1.3.1",
"nodemailer": "^6.9.14",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ export { numberDiffAlgorithm } from './number_diff_algorithm';
export { singleLineStringDiffAlgorithm } from './single_line_string_diff_algorithm';
export { scalarArrayDiffAlgorithm } from './scalar_array_diff_algorithm';
export { simpleDiffAlgorithm } from './simple_diff_algorithm';
export { multiLineStringDiffAlgorithm } from './multi_line_string_diff_algorithm';
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import type { ThreeVersionsOf } from '../../../../../../../../common/api/detection_engine';
import {
ThreeWayDiffOutcome,
ThreeWayMergeOutcome,
MissingVersion,
} from '../../../../../../../../common/api/detection_engine';
import { multiLineStringDiffAlgorithm } from './multi_line_string_diff_algorithm';

describe('multiLineStringDiffAlgorithm', () => {
it('returns current_version as merged output if there is no update - scenario AAA', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My description.\nThis is a second line.',
target_version: 'My description.\nThis is a second line.',
};

const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: mockVersions.current_version,
diff_outcome: ThreeWayDiffOutcome.StockValueNoUpdate,
merge_outcome: ThreeWayMergeOutcome.Current,
has_conflict: false,
})
);
});

it('returns current_version as merged output if current_version is different and there is no update - scenario ABA', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My GREAT description.\nThis is a second line.',
target_version: 'My description.\nThis is a second line.',
};

const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: mockVersions.current_version,
diff_outcome: ThreeWayDiffOutcome.CustomizedValueNoUpdate,
merge_outcome: ThreeWayMergeOutcome.Current,
has_conflict: false,
})
);
});

it('returns target_version as merged output if current_version is the same and there is an update - scenario AAB', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My description.\nThis is a second line.',
target_version: 'My GREAT description.\nThis is a second line.',
};

const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: mockVersions.target_version,
diff_outcome: ThreeWayDiffOutcome.StockValueCanUpdate,
merge_outcome: ThreeWayMergeOutcome.Target,
has_conflict: false,
})
);
});

it('returns current_version as merged output if current version is different but it matches the update - scenario ABB', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My GREAT description.\nThis is a second line.',
target_version: 'My GREAT description.\nThis is a second line.',
};

const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: mockVersions.current_version,
diff_outcome: ThreeWayDiffOutcome.CustomizedValueSameUpdate,
merge_outcome: ThreeWayMergeOutcome.Current,
has_conflict: false,
})
);
});

describe('if all three versions are different - scenario ABC', () => {
it('returns a computated merged version without a conflict if 3 way merge is possible', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: `My description.\nThis is a second line.`,
current_version: `My GREAT description.\nThis is a second line.`,
target_version: `My description.\nThis is a second line, now longer.`,
};

const expectedMergedVersion = `My GREAT description.\nThis is a second line, now longer.`;

const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: expectedMergedVersion,
diff_outcome: ThreeWayDiffOutcome.CustomizedValueCanUpdate,
merge_outcome: ThreeWayMergeOutcome.Merged,
has_conflict: false,
})
);
});

it('returns the current_version with a conflict if 3 way merge is not possible', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My GREAT description.\nThis is a third line.',
target_version: 'My EXCELLENT description.\nThis is a fourth.',
};

const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: mockVersions.current_version,
diff_outcome: ThreeWayDiffOutcome.CustomizedValueCanUpdate,
merge_outcome: ThreeWayMergeOutcome.Conflict,
has_conflict: true,
})
);
});
});

describe('if base_version is missing', () => {
it('returns current_version as merged output if current_version and target_version are the same - scenario -AA', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: MissingVersion,
current_version: 'My description.\nThis is a second line.',
target_version: 'My description.\nThis is a second line.',
};

const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: mockVersions.current_version,
diff_outcome: ThreeWayDiffOutcome.StockValueNoUpdate,
merge_outcome: ThreeWayMergeOutcome.Current,
has_conflict: false,
})
);
});

it('returns target_version as merged output if current_version and target_version are different - scenario -AB', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: MissingVersion,
current_version: `My GREAT description.\nThis is a second line.`,
target_version: `My description.\nThis is a second line, now longer.`,
};

const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: mockVersions.target_version,
diff_outcome: ThreeWayDiffOutcome.StockValueCanUpdate,
merge_outcome: ThreeWayMergeOutcome.Target,
has_conflict: false,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What was our ultimate decision on such cases? Are we going to mark these potential ABC situations as SOLVABLE_CONFLICT later to let users explicitly accept our merge proposal?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is what I'm currently working on: #187770

})
);
});
});
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

import { merge } from 'node-diff3';
import { assertUnreachable } from '../../../../../../../../common/utility_types';
import type {
ThreeVersionsOf,
ThreeWayDiff,
} from '../../../../../../../../common/api/detection_engine/prebuilt_rules';
import {
determineDiffOutcome,
determineIfValueCanUpdate,
ThreeWayDiffOutcome,
ThreeWayMergeOutcome,
MissingVersion,
} from '../../../../../../../../common/api/detection_engine/prebuilt_rules';

/**
* Diff algorithm used for string fields that contain multiple lines
*/
export const multiLineStringDiffAlgorithm = (
versions: ThreeVersionsOf<string>
): ThreeWayDiff<string> => {
const {
base_version: baseVersion,
current_version: currentVersion,
target_version: targetVersion,
} = versions;

const diffOutcome = determineDiffOutcome(baseVersion, currentVersion, targetVersion);
const valueCanUpdate = determineIfValueCanUpdate(diffOutcome);

const { mergeOutcome, mergedVersion } = mergeVersions({
baseVersion,
currentVersion,
targetVersion,
diffOutcome,
});

return {
base_version: baseVersion,
current_version: currentVersion,
target_version: targetVersion,
merged_version: mergedVersion,

diff_outcome: diffOutcome,
merge_outcome: mergeOutcome,
has_update: valueCanUpdate,
has_conflict: mergeOutcome === ThreeWayMergeOutcome.Conflict,
};
};

interface MergeResult {
mergeOutcome: ThreeWayMergeOutcome;
mergedVersion: string;
}

interface MergeArgs {
baseVersion: string | MissingVersion;
currentVersion: string;
targetVersion: string;
diffOutcome: ThreeWayDiffOutcome;
}

const mergeVersions = ({
baseVersion,
currentVersion,
targetVersion,
diffOutcome,
}: MergeArgs): MergeResult => {
switch (diffOutcome) {
case ThreeWayDiffOutcome.StockValueNoUpdate:
case ThreeWayDiffOutcome.CustomizedValueNoUpdate:
case ThreeWayDiffOutcome.CustomizedValueSameUpdate: {
return {
mergeOutcome: ThreeWayMergeOutcome.Current,
mergedVersion: currentVersion,
};
}
case ThreeWayDiffOutcome.StockValueCanUpdate: {
return {
mergeOutcome: ThreeWayMergeOutcome.Target,
mergedVersion: targetVersion,
};
}
case ThreeWayDiffOutcome.CustomizedValueCanUpdate: {
if (baseVersion === MissingVersion) {
return {
mergeOutcome: ThreeWayMergeOutcome.Conflict,
mergedVersion: currentVersion,
};
}
const mergedVersion = merge(currentVersion, baseVersion, targetVersion, {
stringSeparator: /[^\S\r\n]+/, // Separates on all whitespace except for new lines, which we keep to preserve formatting
});

return mergedVersion.conflict
? {
mergeOutcome: ThreeWayMergeOutcome.Conflict,
mergedVersion: currentVersion,
}
: {
mergeOutcome: ThreeWayMergeOutcome.Merged,
mergedVersion: mergedVersion.result.join(' '),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if it's a legitimate concern, but it looks like we are substituting all whitespace chars (except newlines) with spaces when we do .join(' '). I wonder if it can mess up formatting if someone had tab chars in property values.

I think you can't insert a tab or another weird whitespace char when editing via our web UI, but maybe you can if you edit it an external editor.


I played with it a little and tried to come up with a regexp that won't consume whitespace chars so that we won't have to "restore" them later. This is what I came up with:

stringSeparator: /(?<=\r?\n)|[^\S\r\n]+/g

It does split the string correctly. For example this string

First line.\nSecond line.

gets split as

['First', 'line.\n', 'Second', 'line.']

BUT for some reason merge still returns a conflict when I try to run it with

baseVersion: "First line.\nSecond line."
currentVersion: "First row.\nSecond line."
targetVersion: "First line.\nThird line."

I expected it merge without conflict and return

"First row.\nThird line."

@dplumlee Any idea why might that be? Am I misunderstanding how it works?

Copy link
Contributor Author

@dplumlee dplumlee Jul 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a fair question, I was thinking about it when initially toying with the regex. I think the only true way around it would be to make one that gets split with all the whitespace still in tact (e.g. something like ['First ', 'line.\n', 'Second<space character>', 'line.']) so we could .join with no argument and just merge the strings back together with their appending whitespace attached at the hip. None of these fields will be impactful to rule runs themselves so I'm also unsure of it's a big concern but I suppose it has the potential to mess with markdown stuff made via third party (although by default markdown only uses spaces so far as I can tell).

For that particular example, I've been trying to get it to work for a bit - might be the case that the package is just not sophisticated enough to handle it. Or we need better regex as described above

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the only true way around it would be to make one that gets split with all the whitespace still in tact (e.g. something like ['First ', 'line.\n', 'Second', 'line.']) so we could .join with no argument and just merge the strings back together with their appending whitespace attached at the hip.

Played with it a bit more and think I came up with a cleaner regexp that does exactly this: /(?<=\s)(?=\S)/

Scherm­afbeelding 2024-07-15 om 19 52 58

Still the returns a computated merged version without a conflict if 3 way merge is possible test doesn't pass. Because merge returns a conflict when I expect it to merge cleanly. Not sure why.

  {
    conflict: true,
    result: [
      'First ', 
      '<<<<<<<',
      'row.\n',  'Second ',
      '=======', 
      'line.\n', 'Third ',  
      '>>>>>>>',
      'line.'
    ]
  }

Do you have an idea why might that not work?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please disregard most of my above comment. The /(?<=\s)(?=\S)/ regexp actually does work and tests do pass with it! I modified the test implementation, that's why I saw the error 🤦‍♂️. If you use /(?<=\s)(?=\S)/ as stringSeparator and then do .join('') it should work fine. And this way it won't cut away non-space chars.

Also I think I found the limitation of the diffing lib. It can't merge without a conflict if changed sections are adjacent.

For example, this merges cleanly:

{
  base_version: `Line\nAND\nSquare`,
  current_version: `Triangle\nAND\nSquare`,
  target_version: `Line\nAND\nCircle`,
}

// result: `Triangle\nAND\nCircle`

While this results in a conflict:

{
  base_version: `Line\nSquare`,
  current_version: `Triangle\nSquare`,
  target_version: `Line\nCircle`,
};

// result: conflict

But there's nothing we can do about it. Please try the new regexp approach and tell me what you think.

Copy link
Contributor

@jpdjere jpdjere Jul 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @dplumlee and @nikitaindik

I've been doing some additional testing and came up with: stringSeparator: /(\S+|\s+)/g, plus using .join('').

This allowed the algorithm to both:

  • keep all whitespace characters
  • join adjacent sections

Both of the next two examples pass:

1.

      const mockVersions: ThreeVersionsOf<string> = {
        base_version: `My description.\f\nThis is a second\u2001 line.\f\nThis is a third line.`,
        current_version: `My GREAT description.\f\nThis is a second\u2001 line.\f\nThis is a third line.`,
        target_version: `My description.\f\nThis is a second\u2001 line.\f\nThis is a GREAT line.`,
      };

      const expectedMergedVersion = `My GREAT description.\f\nThis is a second\u2001 line.\f\nThis is a GREAT line.`;

2.

      const mockVersions: ThreeVersionsOf<string> = {
        base_version: `Line\nSquare`,
        current_version: `Triangle\nSquare`,
        target_version: `Line\nCircle`,
      };

      const expectedMergedVersion = `Triangle\nCircle`;

};
}
default:
return assertUnreachable(diffOutcome);
}
};
36 changes: 33 additions & 3 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -23840,6 +23840,11 @@ node-cache@^5.1.0:
dependencies:
clone "2.x"

node-diff3@^3.1.2:
version "3.1.2"
resolved "https://registry.yarnpkg.com/node-diff3/-/node-diff3-3.1.2.tgz#49df8d821dc9cbab87bfd6182171d90169613a97"
integrity sha512-wUd9TWy059I8mZdH6G3LPNlAEfxDvXtn/RcyFrbqL3v34WlDxn+Mh4HDhOwWuaMk/ROVepe5tTpnGHbve6Db2g==

node-dir@^0.1.10:
version "0.1.17"
resolved "https://registry.yarnpkg.com/node-dir/-/node-dir-0.1.17.tgz#5f5665d93351335caabef8f1c554516cf5f1e4e5"
Expand Down Expand Up @@ -29349,7 +29354,7 @@ string-replace-loader@^2.2.0:
loader-utils "^1.2.3"
schema-utils "^1.0.0"

"string-width-cjs@npm:string-width@^4.2.0", "string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.2, string-width@^4.2.3:
"string-width-cjs@npm:string-width@^4.2.0":
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
Expand All @@ -29367,6 +29372,15 @@ string-width@^1.0.1:
is-fullwidth-code-point "^1.0.0"
strip-ansi "^3.0.0"

"string-width@^1.0.2 || 2 || 3 || 4", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.2, string-width@^4.2.3:
version "4.2.3"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
dependencies:
emoji-regex "^8.0.0"
is-fullwidth-code-point "^3.0.0"
strip-ansi "^6.0.1"

string-width@^5.0.1, string-width@^5.1.2:
version "5.1.2"
resolved "https://registry.yarnpkg.com/string-width/-/string-width-5.1.2.tgz#14f8daec6d81e7221d2a357e668cab73bdbca794"
Expand Down Expand Up @@ -29477,7 +29491,7 @@ stringify-object@^3.2.1:
is-obj "^1.0.1"
is-regexp "^1.0.0"

"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1:
"strip-ansi-cjs@npm:strip-ansi@^6.0.1":
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
Expand All @@ -29491,6 +29505,13 @@ strip-ansi@^3.0.0, strip-ansi@^3.0.1:
dependencies:
ansi-regex "^2.0.0"

strip-ansi@^6.0.0, strip-ansi@^6.0.1:
version "6.0.1"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
dependencies:
ansi-regex "^5.0.1"

strip-ansi@^7.0.1, strip-ansi@^7.1.0:
version "7.1.0"
resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-7.1.0.tgz#d5b6568ca689d8561370b0707685d22434faff45"
Expand Down Expand Up @@ -32378,7 +32399,7 @@ workerpool@6.2.1:
resolved "https://registry.yarnpkg.com/workerpool/-/workerpool-6.2.1.tgz#46fc150c17d826b86a008e5a4508656777e9c343"
integrity sha512-ILEIE97kDZvF9Wb9f6h5aXK4swSlKGUcOEGiIYb2OOu/IrDU9iwj0fD//SsA6E5ibwJxpEvhullJY4Sl4GcpAw==

"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0:
"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0":
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
Expand All @@ -32404,6 +32425,15 @@ wrap-ansi@^6.2.0:
string-width "^4.1.0"
strip-ansi "^6.0.0"

wrap-ansi@^7.0.0:
version "7.0.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
dependencies:
ansi-styles "^4.0.0"
string-width "^4.1.0"
strip-ansi "^6.0.0"

wrap-ansi@^8.1.0:
version "8.1.0"
resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"
Expand Down