Skip to content

Commit

Permalink
[Security Solution] Fixes multi-line diff algorithm performance in th…
Browse files Browse the repository at this point in the history
…e `upgrade/_review` endpoint (elastic#199388)

**Fixes elastic#199290

## Summary

The current multi-line string algorithm uses a very inefficient regex to
split and analyze string fields, and exponentially increases in time
complexity when the strings are long. This PR substitutes a much simpler
comparison regex for far better efficiency as shown in the table below.

### Performance between different regex options using sample prebuilt
rule setup guide string

| | `/(\S+\|\s+)/g` (original) | `/(\s+)/g` | `/(\n)/g` |
`/(\r\n\|\n\|\r)/g` |

|-----------------------|---------------|----------|---------|-------------------|
| Unit test speed | `986ms` | `96ms` | `1ms` | `2ms` |
| FTR test with 1 rule | `3.0s` | `2.8s` | `2.0s` | `2.0s` |
| FTR test with 5 rules | `11.6s`        | `6.8s`    | `6.1s`   |  |


### Performance between different regex options using intentionally long
strings (25k characters)

|                      | `/(\S+\|\s+)/g`       | `/(\r\n\|\n\|\r)/g` |
|----------------------|-----------------------|---------------------|
| Unit test speed      | `1049414ms` (17 min)  | `58ms`              |
| FTR test with 1 rule | `>360000ms` (Timeout) | `2.1 s`             |

### Checklist

Delete any items that are not applicable to this PR.

- [x] [Unit or functional
tests](https://www.elastic.co/guide/en/kibana/master/development-tests.html)
were updated or added to match the most common scenarios
- [ ] [Flaky Test
Runner](https://ci-stats.kibana.dev/trigger_flaky_test_runner/1) was
used on any tests changed


### For maintainers

- [ ] This was checked for breaking API changes and was [labeled
appropriately](https://www.elastic.co/guide/en/kibana/master/contributing.html#_add_your_labels)
- [ ] This will appear in the **Release Notes** and follow the
[guidelines](https://www.elastic.co/guide/en/kibana/master/contributing.html#kibana-release-notes-process)

---------

Co-authored-by: Elastic Machine <elasticmachine@users.noreply.github.com>
Co-authored-by: Georgii Gorbachev <georgii.gorbachev@elastic.co>
  • Loading branch information
3 people authored Nov 13, 2024
1 parent c35b389 commit 4f6d357
Show file tree
Hide file tree
Showing 4 changed files with 202 additions and 26 deletions.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,23 @@ import {
ThreeWayDiffConflict,
} from '../../../../../../../../common/api/detection_engine';
import { multiLineStringDiffAlgorithm } from './multi_line_string_diff_algorithm';
import {
TEXT_M_A,
TEXT_M_B,
TEXT_M_C,
TEXT_M_MERGED,
TEXT_XL_A,
TEXT_XL_B,
TEXT_XL_C,
TEXT_XL_MERGED,
} from './multi_line_string_diff_algorithm.mock';

describe('multiLineStringDiffAlgorithm', () => {
it('returns current_version as merged output if there is no update - scenario AAA', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My description.\nThis is a second line.',
target_version: 'My description.\nThis is a second line.',
base_version: TEXT_M_A,
current_version: TEXT_M_A,
target_version: TEXT_M_A,
};

const result = multiLineStringDiffAlgorithm(mockVersions);
Expand All @@ -36,9 +46,9 @@ describe('multiLineStringDiffAlgorithm', () => {

it('returns current_version as merged output if current_version is different and there is no update - scenario ABA', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My GREAT description.\nThis is a second line.',
target_version: 'My description.\nThis is a second line.',
base_version: TEXT_M_A,
current_version: TEXT_M_B,
target_version: TEXT_M_A,
};

const result = multiLineStringDiffAlgorithm(mockVersions);
Expand All @@ -55,9 +65,9 @@ describe('multiLineStringDiffAlgorithm', () => {

it('returns target_version as merged output if current_version is the same and there is an update - scenario AAB', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My description.\nThis is a second line.',
target_version: 'My GREAT description.\nThis is a second line.',
base_version: TEXT_M_A,
current_version: TEXT_M_A,
target_version: TEXT_M_B,
};

const result = multiLineStringDiffAlgorithm(mockVersions);
Expand All @@ -74,9 +84,9 @@ describe('multiLineStringDiffAlgorithm', () => {

it('returns current_version as merged output if current version is different but it matches the update - scenario ABB', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My GREAT description.\nThis is a second line.',
target_version: 'My GREAT description.\nThis is a second line.',
base_version: TEXT_M_A,
current_version: TEXT_M_B,
target_version: TEXT_M_B,
};

const result = multiLineStringDiffAlgorithm(mockVersions);
Expand All @@ -92,32 +102,53 @@ describe('multiLineStringDiffAlgorithm', () => {
});

describe('if all three versions are different - scenario ABC', () => {
it('returns a computated merged version without a conflict if 3 way merge is possible', () => {
it('returns a computated merged version with a solvable conflict if 3 way merge is possible (real-world example)', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: `My description.\f\nThis is a second\u2001 line.\f\nThis is a third line.`,
current_version: `My GREAT description.\f\nThis is a second\u2001 line.\f\nThis is a third line.`,
target_version: `My description.\f\nThis is a second\u2001 line.\f\nThis is a GREAT line.`,
base_version: TEXT_M_A,
current_version: TEXT_M_B,
target_version: TEXT_M_C,
};

const expectedMergedVersion = `My GREAT description.\f\nThis is a second\u2001 line.\f\nThis is a GREAT line.`;
const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: TEXT_M_MERGED,
diff_outcome: ThreeWayDiffOutcome.CustomizedValueCanUpdate,
conflict: ThreeWayDiffConflict.SOLVABLE,
merge_outcome: ThreeWayMergeOutcome.Merged,
})
);
});

it('returns a computated merged version with a solvable conflict if 3 way merge is possible (simplified example)', () => {
// 3 way merge is possible when changes are made to different lines of text
// (in other words, there are no different changes made to the same line of text).
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My MODIFIED description.\nThis is a second line.',
target_version: 'My description.\nThis is a MODIFIED second line.',
};

const result = multiLineStringDiffAlgorithm(mockVersions);

expect(result).toEqual(
expect.objectContaining({
merged_version: expectedMergedVersion,
merged_version: 'My MODIFIED description.\nThis is a MODIFIED second line.',
diff_outcome: ThreeWayDiffOutcome.CustomizedValueCanUpdate,
conflict: ThreeWayDiffConflict.SOLVABLE,
merge_outcome: ThreeWayMergeOutcome.Merged,
})
);
});

it('returns the current_version with a conflict if 3 way merge is not possible', () => {
it('returns the current_version with a non-solvable conflict if 3 way merge is not possible (simplified example)', () => {
// It's enough to have different changes made to the same line of text
// to trigger a NON_SOLVABLE conflict. This behavior is similar to how Git works.
const mockVersions: ThreeVersionsOf<string> = {
base_version: 'My description.\nThis is a second line.',
current_version: 'My GREAT description.\nThis is a third line.',
target_version: 'My EXCELLENT description.\nThis is a fourth.',
current_version: 'My GREAT description.\nThis is a second line.',
target_version: 'My EXCELLENT description.\nThis is a second line.',
};

const result = multiLineStringDiffAlgorithm(mockVersions);
Expand All @@ -131,14 +162,39 @@ describe('multiLineStringDiffAlgorithm', () => {
})
);
});

it('does not exceed performance limits when diffing and merging extra large input texts', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: TEXT_XL_A,
current_version: TEXT_XL_B,
target_version: TEXT_XL_C,
};

const startTime = performance.now();
const result = multiLineStringDiffAlgorithm(mockVersions);
const endTime = performance.now();

// If the regex merge in this function takes over 500ms, this test fails
// Performance measurements: https://github.com/elastic/kibana/pull/199388
expect(endTime - startTime).toBeLessThan(500);

expect(result).toEqual(
expect.objectContaining({
merged_version: TEXT_XL_MERGED,
diff_outcome: ThreeWayDiffOutcome.CustomizedValueCanUpdate,
conflict: ThreeWayDiffConflict.SOLVABLE,
merge_outcome: ThreeWayMergeOutcome.Merged,
})
);
});
});

describe('if base_version is missing', () => {
it('returns current_version as merged output if current_version and target_version are the same - scenario -AA', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: MissingVersion,
current_version: 'My description.\nThis is a second line.',
target_version: 'My description.\nThis is a second line.',
current_version: TEXT_M_A,
target_version: TEXT_M_A,
};

const result = multiLineStringDiffAlgorithm(mockVersions);
Expand All @@ -158,8 +214,8 @@ describe('multiLineStringDiffAlgorithm', () => {
it('returns target_version as merged output if current_version and target_version are different - scenario -AB', () => {
const mockVersions: ThreeVersionsOf<string> = {
base_version: MissingVersion,
current_version: `My GREAT description.\nThis is a second line.`,
target_version: `My description.\nThis is a second line, now longer.`,
current_version: TEXT_M_A,
target_version: TEXT_M_B,
};

const result = multiLineStringDiffAlgorithm(mockVersions);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ const mergeVersions = ({
// TS does not realize that in ABC scenario, baseVersion cannot be missing
// Missing baseVersion scenarios were handled as -AA and -AB.
const mergedVersion = merge(currentVersion, baseVersion ?? '', targetVersion, {
stringSeparator: /(\S+|\s+)/g, // Retains all whitespace, which we keep to preserve formatting
stringSeparator: /(\r\n|\n|\r)/g, // Separates strings by new lines
});

return mergedVersion.conflict
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,12 @@ import {
ThreeWayDiffOutcome,
ThreeWayMergeOutcome,
} from '@kbn/security-solution-plugin/common/api/detection_engine';
import {
TEXT_XL_A,
TEXT_XL_B,
TEXT_XL_C,
TEXT_XL_MERGED,
} from '@kbn/security-solution-plugin/server/lib/detection_engine/prebuilt_rules/logic/diff/calculation/algorithms/multi_line_string_diff_algorithm.mock';
import { FtrProviderContext } from '../../../../../../ftr_provider_context';
import {
deleteAllTimelines,
Expand Down Expand Up @@ -249,6 +255,56 @@ export default ({ getService }: FtrProviderContext): void => {
expect(reviewResponse.stats.num_rules_with_conflicts).toBe(1);
expect(reviewResponse.stats.num_rules_with_non_solvable_conflicts).toBe(0);
});

it('should handle long multi-line strings without timing out', async () => {
// Install base prebuilt detection rule
await createHistoricalPrebuiltRuleAssetSavedObjects(es, [
createRuleAssetSavedObject({
rule_id: 'rule-1',
version: 1,
description: TEXT_XL_A,
}),
]);
await installPrebuiltRules(es, supertest);

// Customize a multi line string field on the installed rule
await patchRule(supertest, log, {
rule_id: 'rule-1',
description: TEXT_XL_B,
});

// Increment the version of the installed rule, update a multi line string field, and create the new rule assets
const updatedRuleAssetSavedObjects = [
createRuleAssetSavedObject({
rule_id: 'rule-1',
version: 2,
description: TEXT_XL_C,
}),
];
await createHistoricalPrebuiltRuleAssetSavedObjects(es, updatedRuleAssetSavedObjects);

// Call the upgrade review prebuilt rules endpoint and check that one rule is eligible for update
// and multi line string field update has no conflict
const reviewResponse = await reviewPrebuiltRulesToUpgrade(supertest);
expect(reviewResponse.rules[0].diff.fields.description).toEqual({
base_version: TEXT_XL_A,
current_version: TEXT_XL_B,
target_version: TEXT_XL_C,
merged_version: TEXT_XL_MERGED,
diff_outcome: ThreeWayDiffOutcome.CustomizedValueCanUpdate,
merge_outcome: ThreeWayMergeOutcome.Merged,
conflict: ThreeWayDiffConflict.SOLVABLE,
has_update: true,
has_base_version: true,
});
expect(reviewResponse.rules[0].diff.num_fields_with_updates).toBe(2);
expect(reviewResponse.rules[0].diff.num_fields_with_conflicts).toBe(1);
expect(reviewResponse.rules[0].diff.num_fields_with_non_solvable_conflicts).toBe(0);

expect(reviewResponse.stats.num_rules_to_upgrade_total).toBe(1);
expect(reviewResponse.stats.num_rules_with_conflicts).toBe(1);
expect(reviewResponse.stats.num_rules_with_non_solvable_conflicts).toBe(0);
});
});

describe('when all versions are not mergable', () => {
Expand Down

0 comments on commit 4f6d357

Please sign in to comment.