From d15da0ada1e5037572c18c99fca62a1266e71bda Mon Sep 17 00:00:00 2001 From: Ross Wolf <31489089+rw-access@users.noreply.github.com> Date: Thu, 23 Jul 2020 13:39:35 -0400 Subject: [PATCH] Add versioned schemas with a downgrade path (#84) * Add versioned schemas with a downgrade path * Remove and move unused variables * Add missing license * Skip NotField for output_index * Add strip_additional_properties for kibana import * Remove stray comment * Apply suggestions from code review Co-authored-by: Justin Ibarra --- detection_rules/__init__.py | 4 +- detection_rules/main.py | 4 +- detection_rules/mappings.py | 2 +- detection_rules/rule.py | 30 +-- detection_rules/rule_formatter.py | 6 +- detection_rules/rule_loader.py | 4 +- detection_rules/schema.py | 277 -------------------------- detection_rules/schemas/__init__.py | 54 +++++ detection_rules/schemas/base.py | 134 +++++++++++++ detection_rules/schemas/rta_schema.py | 23 +++ detection_rules/schemas/v78.py | 132 ++++++++++++ detection_rules/schemas/v79.py | 75 +++++++ tests/test_schemas.py | 105 ++++++++++ 13 files changed, 551 insertions(+), 299 deletions(-) delete mode 100644 detection_rules/schema.py create mode 100644 detection_rules/schemas/__init__.py create mode 100644 detection_rules/schemas/base.py create mode 100644 detection_rules/schemas/rta_schema.py create mode 100644 detection_rules/schemas/v78.py create mode 100644 detection_rules/schemas/v79.py create mode 100644 tests/test_schemas.py diff --git a/detection_rules/__init__.py b/detection_rules/__init__.py index 2ba12712287..d81810849cf 100644 --- a/detection_rules/__init__.py +++ b/detection_rules/__init__.py @@ -9,7 +9,7 @@ from . import misc from . import rule_formatter from . import rule_loader -from . import schema +from . import schemas from . import utils __all__ = ( @@ -19,6 +19,6 @@ 'misc', 'rule_formatter', 'rule_loader', - 'schema', + 'schemas', 'utils', ) diff --git a/detection_rules/main.py b/detection_rules/main.py index ce5c16c8cea..f87bacb1bb4 100644 --- a/detection_rules/main.py +++ b/detection_rules/main.py @@ -20,7 +20,7 @@ from .packaging import PACKAGE_FILE, Package, manage_versions, RELEASE_DIR from .rule import Rule from .rule_formatter import toml_write -from .schema import RULE_TYPES +from .schemas import CurrentSchema from .utils import get_path, clear_caches @@ -36,7 +36,7 @@ def root(): @click.argument('path', type=click.Path(dir_okay=False)) @click.option('--config', '-c', type=click.Path(exists=True, dir_okay=False), help='Rule or config file') @click.option('--required-only', is_flag=True, help='Only prompt for required fields') -@click.option('--rule-type', '-t', type=click.Choice(RULE_TYPES), help='Type of rule to create') +@click.option('--rule-type', '-t', type=click.Choice(CurrentSchema.RULE_TYPES), help='Type of rule to create') def create_rule(path, config, required_only, rule_type): """Create a detection rule.""" config = load_dump(config) if config else {} diff --git a/detection_rules/mappings.py b/detection_rules/mappings.py index 9c5c2e032dc..d9090191aa0 100644 --- a/detection_rules/mappings.py +++ b/detection_rules/mappings.py @@ -6,7 +6,7 @@ import os from collections import defaultdict -from .schema import validate_rta_mapping +from .schemas import validate_rta_mapping from .utils import load_etc_dump, save_etc_dump, get_path diff --git a/detection_rules/rule.py b/detection_rules/rule.py index d9f40e2ca20..d692f3bf2d7 100644 --- a/detection_rules/rule.py +++ b/detection_rules/rule.py @@ -14,7 +14,7 @@ from . import ecs, beats from .attack import TACTICS, build_threat_map_entry, technique_lookup from .rule_formatter import nested_normalize, toml_write -from .schema import RULE_TYPES, metadata_schema, schema_validate, get_schema +from .schemas import CurrentSchema, TomlMetadata # RULE_TYPES, metadata_schema, schema_validate, get_schema from .utils import get_path, clear_caches, cached @@ -98,13 +98,13 @@ def to_eql(self): @cached def get_meta_schema_required_defaults(): """Get the default values for required properties in the metadata schema.""" - required = [v for v in metadata_schema['required']] - properties = {k: v for k, v in metadata_schema['properties'].items() if k in required} + required = [v for v in TomlMetadata.get_schema()['required']] + properties = {k: v for k, v in TomlMetadata.get_schema()['properties'].items() if k in required} return {k: v.get('default') or [v['items']['default']] for k, v in properties.items()} def set_metadata(self, contents): """Parse metadata fields and set missing required fields to the default values.""" - metadata = {k: v for k, v in contents.items() if k in metadata_schema['properties']} + metadata = {k: v for k, v in contents.items() if k in TomlMetadata.get_schema()['properties']} defaults = self.get_meta_schema_required_defaults().copy() defaults.update(metadata) return defaults @@ -141,9 +141,16 @@ def validate(self, as_rule=False, versioned=False, query=True): self.normalize() if as_rule: - schema_validate(self.rule_format(), as_rule=True) + schema_cls = CurrentSchema.toml_schema() + contents = self.rule_format() + elif versioned: + schema_cls = CurrentSchema.versioned() + contents = self.contents else: - schema_validate(self.contents, versioned=versioned) + schema_cls = CurrentSchema + contents = self.contents + + schema_cls.validate(contents, role=self.type) if query and self.query and self.contents['language'] == 'kuery': ecs_versions = self.metadata.get('ecs_version') @@ -204,14 +211,13 @@ def get_hash(self): def build(cls, path=None, rule_type=None, required_only=True, save=True, **kwargs): """Build a rule from data and prompts.""" from .misc import schema_prompt - # from .rule_loader import rta_mappings kwargs = copy.deepcopy(kwargs) - while rule_type not in RULE_TYPES: - rule_type = click.prompt('Rule type ({})'.format(', '.join(RULE_TYPES))) + rule_type = click.prompt('Rule type ({})'.format(', '.join(CurrentSchema.RULE_TYPES)), + type=click.Choice(CurrentSchema.RULE_TYPES)) - schema = get_schema(rule_type) + schema = CurrentSchema.get_schema(role=rule_type) props = schema['properties'] opt_reqs = schema.get('required', []) contents = {} @@ -269,12 +275,12 @@ def build(cls, path=None, rule_type=None, required_only=True, save=True, **kwarg metadata = {} ecs_version = schema_prompt('ecs_version', required=False, value=None, - **metadata_schema['properties']['ecs_version']) + **TomlMetadata.get_schema()['properties']['ecs_version']) if ecs_version: metadata['ecs_version'] = ecs_version # validate before creating - schema_validate(contents) + CurrentSchema.toml_schema().validate(contents) suggested_path = os.path.join(RULES_DIR, contents['name']) # TODO: UPDATE BASED ON RULE STRUCTURE path = os.path.realpath(path or input('File path for rule [{}]: '.format(suggested_path)) or suggested_path) diff --git a/detection_rules/rule_formatter.py b/detection_rules/rule_formatter.py index e6d90c1b5e0..854b17f297b 100644 --- a/detection_rules/rule_formatter.py +++ b/detection_rules/rule_formatter.py @@ -10,7 +10,7 @@ import toml -from .schema import NONFORMATTED_FIELDS +from .schemas import CurrentSchema SQ = "'" DQ = '"' @@ -34,7 +34,7 @@ def nested_normalize(d, skip_cleanup=False): if k == 'query': # TODO: the linter still needs some work, but once up to par, uncomment to implement - kql.lint(v) d.update({k: nested_normalize(v)}) - elif k in NONFORMATTED_FIELDS: + elif k in CurrentSchema.markdown_fields(): # let these maintain newlines and whitespace for markdown support d.update({k: nested_normalize(v, skip_cleanup=True)}) else: @@ -160,7 +160,7 @@ def _do_write(_data, _contents): bottom[k] = v else: top[k] = v - elif k in NONFORMATTED_FIELDS: + elif k in CurrentSchema.markdown_fields(): top[k] = NonformattedField(v) else: top[k] = v diff --git a/detection_rules/rule_loader.py b/detection_rules/rule_loader.py index 650457bf9af..302206f3510 100644 --- a/detection_rules/rule_loader.py +++ b/detection_rules/rule_loader.py @@ -15,7 +15,7 @@ from .mappings import RtaMappings from .rule import RULES_DIR, Rule -from .schema import get_schema +from .schemas import CurrentSchema from .utils import get_path, cached @@ -171,7 +171,7 @@ def get_production_rules(): def find_unneeded_defaults(rule): """Remove values that are not required in the schema which are set with default values.""" - schema = get_schema(rule.contents['type']) + schema = CurrentSchema.get_schema(rule.type) props = schema['properties'] unrequired_defaults = [p for p in props if p not in schema['required'] and props[p].get('default')] default_matches = {p: rule.contents[p] for p in unrequired_defaults diff --git a/detection_rules/schema.py b/detection_rules/schema.py deleted file mode 100644 index ef1e5f70463..00000000000 --- a/detection_rules/schema.py +++ /dev/null @@ -1,277 +0,0 @@ -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License; -# you may not use this file except in compliance with the Elastic License. - -"""Definitions for rule metadata and schemas.""" -import time - -import jsl -import jsonschema - -from . import ecs -from .attack import TACTICS, TACTICS_MAP, TECHNIQUES, technique_lookup -from .utils import cached - -UUID_PATTERN = r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' -DATE_PATTERN = r'\d{4}/\d{2}/\d{2}' -VERSION_PATTERN = r'\d+\.\d+\.\d+' -RULE_LEVELS = ['recommended', 'aggressive'] -MATURITY_LEVELS = ['development', 'testing', 'staged', 'production', 'deprecated'] -OPERATORS = ['equals'] -OS_OPTIONS = ['windows', 'linux', 'macos', 'solaris'] # need to verify with ecs -INTERVAL_PATTERN = r'\d+[mshd]' -MITRE_URL_PATTERN = r'https://attack.mitre.org/{type}/T[A-Z0-9]+/' - -NONFORMATTED_FIELDS = 'note', - - -# kibana/.../siem/server/lib/detection_engine/routes/schemas/add_prepackaged_rules_schema.ts -# /detection_engine/routes/schemas/schemas.ts -# rule_id is required here -# output_index is not allowed (and instead the space index must be used) -# immutable defaults to true instead of to false and if it is there can only be true -# enabled defaults to false instead of true -# version is a required field that must exist - -# rule types -MACHINE_LEARNING = 'machine_learning' -SAVED_QUERY = 'saved_query' -QUERY = 'query' -THRESHOLD = 'threshold' - -RULE_TYPES = [MACHINE_LEARNING, SAVED_QUERY, QUERY, THRESHOLD] - - -class FilterMetadata(jsl.Document): - """Base class for siem rule meta filters.""" - - negate = jsl.BooleanField() - type = jsl.StringField() - key = jsl.StringField() - value = jsl.StringField() - disabled = jsl.BooleanField() - indexRefName = jsl.StringField() - alias = jsl.StringField() # null acceptable - params = jsl.DictField(properties={'query': jsl.StringField()}) - - -class FilterQuery(jsl.Document): - """Base class for siem rule query filters.""" - - match = jsl.DictField({ - 'event.action': jsl.DictField(properties={ - 'query': jsl.StringField(), - 'type': jsl.StringField() - }) - }) - - -class FilterState(jsl.Document): - """Base class for siem rule $state filters.""" - - store = jsl.StringField() - - -class FilterExists(jsl.Document): - """Base class for siem rule $state filters.""" - - field = jsl.StringField() - - -class Filters(jsl.Document): - """Schema for filters""" - - exists = jsl.DocumentField(FilterExists) - meta = jsl.DocumentField(FilterMetadata) - state = jsl.DocumentField(FilterState, name='$state') - query = jsl.DocumentField(FilterQuery) - - -class RiskScoreMapping(jsl.Document): - """Risk score mapping.""" - - field = jsl.StringField(required=True) - operator = jsl.StringField(required=False, enum=OPERATORS) - value = jsl.StringField(required=False) - - -class SeverityMapping(jsl.Document): - """Severity mapping.""" - - field = jsl.StringField(required=True) - operator = jsl.StringField(required=False, enum=OPERATORS) - value = jsl.StringField(required=False) - severity = jsl.StringField(required=False) - - -class ThresholdMapping(jsl.Document): - """Threshold mapping.""" - - field = jsl.StringField(required=False) - value = jsl.IntField(minimum=1, required=True) - - -class ThreatTactic(jsl.Document): - """Threat tactics.""" - - id = jsl.StringField(enum=TACTICS_MAP.values()) - name = jsl.StringField(enum=TACTICS) - reference = jsl.StringField(MITRE_URL_PATTERN.format(type='tactics')) - - -class ThreatTechnique(jsl.Document): - """Threat tactics.""" - - id = jsl.StringField(enum=list(technique_lookup)) - name = jsl.StringField(enum=TECHNIQUES) - reference = jsl.StringField(MITRE_URL_PATTERN.format(type='techniques')) - - -class Threat(jsl.Document): - """Threat framework mapping such as MITRE ATT&CK.""" - - framework = jsl.StringField(default='MITRE ATT&CK', required=True) - tactic = jsl.DocumentField(ThreatTactic, required=True) - technique = jsl.ArrayField(jsl.DocumentField(ThreatTechnique), required=True) - - -class SiemRuleApiSchema(jsl.Document): - """Schema for siem rule in API format.""" - - actions = jsl.ArrayField(required=False) - author = jsl.ArrayField(jsl.StringField(default="Elastic"), required=True, min_items=1) - building_block_type = jsl.StringField(required=False) - description = jsl.StringField(required=True) - # api defaults to false if blank - enabled = jsl.BooleanField(default=False, required=False) - exceptions_list = jsl.ArrayField(required=False) - # _ required since `from` is a reserved word in python - from_ = jsl.StringField(required=False, default='now-6m', name='from') - false_positives = jsl.ArrayField(jsl.StringField(), required=False) - filters = jsl.ArrayField(jsl.DocumentField(Filters)) - interval = jsl.StringField(pattern=INTERVAL_PATTERN, default='5m', required=False) - license = jsl.StringField(required=True, default="Elastic License") - max_signals = jsl.IntField(minimum=1, required=False, default=100) # cap a max? - meta = jsl.DictField(required=False) - name = jsl.StringField(required=True) - note = jsl.StringField(required=False) - # output_index = jsl.StringField(required=False) # this is NOT allowed! - references = jsl.ArrayField(jsl.StringField(), required=False) - risk_score = jsl.IntField(minimum=0, maximum=100, required=True, default=21) - risk_score_mapping = jsl.ArrayField(jsl.DocumentField(RiskScoreMapping), required=False, min_items=1) - rule_id = jsl.StringField(pattern=UUID_PATTERN, required=True) - rule_name_override = jsl.StringField(required=False) - severity = jsl.StringField(enum=['low', 'medium', 'high', 'critical'], default='low', required=True) - severity_mapping = jsl.ArrayField(jsl.DocumentField(SeverityMapping), required=False, min_items=1) - # saved_id - type must be 'saved_query' to allow this or else it is forbidden - tags = jsl.ArrayField(jsl.StringField(), required=False) - throttle = jsl.StringField(required=False) - timeline_id = jsl.StringField(required=False) - timeline_title = jsl.StringField(required=False) - timestamp_override = jsl.StringField(required=False) - to = jsl.StringField(required=False, default='now') - # require this to be always validated with a role - # type = jsl.StringField(enum=[MACHINE_LEARNING, QUERY, SAVED_QUERY], required=True) - threat = jsl.ArrayField(jsl.DocumentField(Threat), required=False, min_items=1) - - with jsl.Scope(MACHINE_LEARNING) as ml_scope: - ml_scope.anomaly_threshold = jsl.IntField(required=True, minimum=0) - ml_scope.machine_learning_job_id = jsl.StringField(required=True) - ml_scope.type = jsl.StringField(enum=[MACHINE_LEARNING], required=True, default=MACHINE_LEARNING) - - with jsl.Scope(SAVED_QUERY) as saved_id_scope: - saved_id_scope.index = jsl.ArrayField(jsl.StringField(), required=False) - saved_id_scope.saved_id = jsl.StringField(required=True) - saved_id_scope.type = jsl.StringField(enum=[SAVED_QUERY], required=True, default=SAVED_QUERY) - - with jsl.Scope(QUERY) as query_scope: - query_scope.index = jsl.ArrayField(jsl.StringField(), required=False) - # this is not required per the API but we will enforce it here - query_scope.language = jsl.StringField(enum=['kuery', 'lucene'], required=True, default='kuery') - query_scope.query = jsl.StringField(required=True) - query_scope.type = jsl.StringField(enum=[QUERY], required=True, default=QUERY) - - with jsl.Scope(THRESHOLD) as threshold_scope: - threshold_scope.index = jsl.ArrayField(jsl.StringField(), required=False) - # this is not required per the API but we will enforce it here - threshold_scope.language = jsl.StringField(enum=['kuery', 'lucene'], required=True, default='kuery') - threshold_scope.query = jsl.StringField(required=True) - threshold_scope.type = jsl.StringField(enum=[THRESHOLD], required=True, default=THRESHOLD) - threshold_scope.threshold = jsl.DocumentField(ThresholdMapping, required=True) - - -class VersionedApiSchema(SiemRuleApiSchema): - """Schema for siem rule in API format with version.""" - - version = jsl.IntField(minimum=1, default=1, required=True) - - -class SiemRuleTomlMetadata(jsl.Document): - """Schema for siem rule toml metadata.""" - - creation_date = jsl.StringField(required=True, pattern=DATE_PATTERN, default=time.strftime('%Y/%m/%d')) - - # added to query with rule.optimize() - # rule validated against each ecs schema contained - ecs_version = jsl.ArrayField( - jsl.StringField(pattern=VERSION_PATTERN, required=True, default=ecs.get_max_version()), required=True) - maturity = jsl.StringField(enum=MATURITY_LEVELS, default='development', required=True) - - # if present, add to query - os_type_list = jsl.ArrayField(jsl.StringField(enum=OS_OPTIONS), required=False) - related_endpoint_rules = jsl.ArrayField(jsl.ArrayField(jsl.StringField(), min_items=2, max_items=2), - required=False) - updated_date = jsl.StringField(required=True, pattern=DATE_PATTERN, default=time.strftime('%Y/%m/%d')) - - -class SiemRuleTomlSchema(jsl.Document): - """Schema for siem rule in management toml format.""" - - metadata = jsl.DocumentField(SiemRuleTomlMetadata) - rule = jsl.DocumentField(SiemRuleApiSchema) - - -class Package(jsl.Document): - """Schema for siem rule staging.""" - - -class MappingCount(jsl.Document): - """Mapping count schema.""" - - count = jsl.IntField(minimum=0, required=True) - rta_name = jsl.StringField(pattern=r'[a-zA-Z-_]+', required=True) - rule_name = jsl.StringField(required=True) - sources = jsl.ArrayField(jsl.StringField(), min_items=1) - - -@cached -def get_schema(role, as_rule=False, versioned=False): - """Get applicable schema by role type and rule format.""" - if versioned: - cls = VersionedApiSchema - else: - cls = SiemRuleTomlSchema if as_rule else SiemRuleApiSchema - - return cls.get_schema(ordered=True, role=role) - - -@cached -def schema_validate(contents, as_rule=False, versioned=False): - """Validate against all schemas until first hit.""" - assert isinstance(contents, dict) - role = contents.get('rule', {}).get('type') if as_rule else contents.get('type') - - if not role: - raise ValueError('Missing rule type!') - - return jsonschema.validate(contents, get_schema(role, as_rule, versioned)) - - -metadata_schema = SiemRuleTomlMetadata.get_schema(ordered=True) -package_schema = Package.get_schema(ordered=True) -mapping_schema = MappingCount.get_schema(ordered=True) - - -def validate_rta_mapping(mapping): - """Validate the RTA mapping.""" - jsonschema.validate(mapping, mapping_schema) diff --git a/detection_rules/schemas/__init__.py b/detection_rules/schemas/__init__.py new file mode 100644 index 00000000000..89074da0382 --- /dev/null +++ b/detection_rules/schemas/__init__.py @@ -0,0 +1,54 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +from .base import TomlMetadata +from .rta_schema import validate_rta_mapping +from ..semver import Version + +# import all of the schema versions +from .v78 import ApiSchema78 +from .v79 import ApiSchema79 + +__all__ = ( + "all_schemas", + "downgrade", + "CurrentSchema", + "validate_rta_mapping", + "TomlMetadata", +) + +all_schemas = [ + ApiSchema78, + ApiSchema79, +] + +CurrentSchema = max(all_schemas, key=lambda cls: Version(cls.STACK_VERSION)) + + +def downgrade(api_contents, target_version): + """Downgrade a rule to a target stack version.""" + # truncate to (major, minor) + target_version = Version(target_version)[:2] + versions = set(Version(schema_cls.STACK_VERSION) for schema_cls in all_schemas) + role = api_contents.get("type") + + check_versioned = "version" in api_contents + + if target_version not in versions: + raise ValueError(f"Unable to downgrade from {CurrentSchema.STACK_VERSION} to {target_version}") + + current_schema = None + + for target_schema in reversed(all_schemas): + if check_versioned: + target_schema = target_schema.versioned() + + if current_schema is not None: + api_contents = current_schema.downgrade(target_schema, api_contents, role) + + current_schema = target_schema + if Version(current_schema.STACK_VERSION) == target_version: + break + + return api_contents diff --git a/detection_rules/schemas/base.py b/detection_rules/schemas/base.py new file mode 100644 index 00000000000..b236ae328fe --- /dev/null +++ b/detection_rules/schemas/base.py @@ -0,0 +1,134 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +"""Definitions for rule metadata and schemas.""" + +import time + +import jsl +import jsonschema + +from .. import ecs +from ..utils import cached + + +DATE_PATTERN = r'\d{4}/\d{2}/\d{2}' +MATURITY_LEVELS = ['development', 'testing', 'staged', 'production', 'deprecated'] +OS_OPTIONS = ['windows', 'linux', 'macos', 'solaris'] # need to verify with ecs +UUID_PATTERN = r'[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}' +VERSION_PATTERN = r'\d+\.\d+\.\d+' + + +class MarkdownField(jsl.StringField): + """Helper class for noting which fields are markdown.""" + + def __init__(self, *args, **kwargs): + kwargs["format"] = "markdown" + jsl.StringField.__init__(self, *args, **kwargs) + + +class GenericSchema(jsl.Document): + """Generic schema with helper methods.""" + + @classmethod + @cached + def get_schema(cls, role=jsl.DEFAULT_ROLE, ordered=False): + """Wrap jsl.Document.get_schema to add caching.""" + return super(GenericSchema, cls).get_schema(role=role, ordered=ordered) + + @classmethod + @cached + def validate(cls, document, role=None): + """Validate a document against this schema.""" + schema = cls.get_schema(role=role) + return jsonschema.validate(document, schema) + + @classmethod + def strip_additional_properties(cls, document, role=None): + """Strip properties that aren't defined in the schema.""" + if role is None: + role = document.get("type", jsl.DEFAULT_ROLE) + + if role not in cls.RULE_TYPES: + raise ValueError(f"Unsupported rule type {role}") + + target_schema = cls.get_schema(role)["properties"] + stripped = {} + + # simple version, can customize or walk structures deeper when we have a need and use case + for field in target_schema: + if field in document: + stripped[field] = document[field] + elif target_schema[field].get("required") and "default" in target_schema: + stripped[field] = target_schema[field]["required"] + + # finally, validate against the json schema + cls.validate(stripped, role) + return stripped + + +class TomlMetadata(GenericSchema): + """Schema for siem rule toml metadata.""" + + creation_date = jsl.StringField(required=True, pattern=DATE_PATTERN, default=time.strftime('%Y/%m/%d')) + + # rule validated against each ecs schema contained + ecs_version = jsl.ArrayField( + jsl.StringField(pattern=VERSION_PATTERN, required=True, default=ecs.get_max_version()), required=True) + maturity = jsl.StringField(enum=MATURITY_LEVELS, default='development', required=True) + + os_type_list = jsl.ArrayField(jsl.StringField(enum=OS_OPTIONS), required=False) + related_endpoint_rules = jsl.ArrayField(jsl.ArrayField(jsl.StringField(), min_items=2, max_items=2), + required=False) + updated_date = jsl.StringField(required=True, pattern=DATE_PATTERN, default=time.strftime('%Y/%m/%d')) + + +class BaseApiSchema(GenericSchema): + """Base API schema with generic methods.""" + + STACK_VERSION = str() + + rule_id = jsl.StringField(pattern=UUID_PATTERN, required=True) + type = jsl.StringField(required=True) + + @classmethod + @cached + def versioned(cls): + """Get a subclass that is version aware.""" + attrs = {"version": jsl.IntField(minimum=1, default=1, required=True)} + return type("Versioned" + cls.__name__, (cls, ), attrs) + + @classmethod + def validate(cls, document, role=None, toml=False): + """Validate a document against this API schema.""" + if toml: + role = role or document.get("rule", {}).get("type") + return cls.toml_schema().validate(document, role=role) + + role = role or document.get("type") + return super(BaseApiSchema, cls).validate(document, role=role) + + @classmethod + @cached + def markdown_fields(cls, role=None): + properties = cls.get_schema(role)["properties"] + return {p for p in properties if properties[p].get("format") == "markdown"} + + @classmethod + @cached + def toml_schema(cls): + """Create a custom TOML schema class that includes this API schema.""" + attrs = { + "metadata": jsl.DocumentField(TomlMetadata, required=True), + "rule": jsl.DocumentField(cls, required=True) + } + return type("Versioned" + cls.__name__, (GenericSchema, ), attrs) + + @classmethod + def downgrade(cls, target_cls, document, role=None): + """Downgrade from one schema to its predecessor.""" + # by default, we'll just strip extra properties + # different schemas can override this to provide a more advanced migration path + # and deeper evaluation of the schema. + return target_cls.strip_additional_properties(document, role=role) diff --git a/detection_rules/schemas/rta_schema.py b/detection_rules/schemas/rta_schema.py new file mode 100644 index 00000000000..8dc5ca0f5a2 --- /dev/null +++ b/detection_rules/schemas/rta_schema.py @@ -0,0 +1,23 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +import jsl +import jsonschema + + +class MappingCount(jsl.Document): + """Mapping count schema.""" + + count = jsl.IntField(minimum=0, required=True) + rta_name = jsl.StringField(pattern=r'[a-zA-Z-_]+', required=True) + rule_name = jsl.StringField(required=True) + sources = jsl.ArrayField(jsl.StringField(), min_items=1) + + +mapping_schema = MappingCount.get_schema() + + +def validate_rta_mapping(mapping): + """Validate the RTA mapping.""" + jsonschema.validate(mapping, mapping_schema) diff --git a/detection_rules/schemas/v78.py b/detection_rules/schemas/v78.py new file mode 100644 index 00000000000..1bf71ae761a --- /dev/null +++ b/detection_rules/schemas/v78.py @@ -0,0 +1,132 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +"""Definitions for rule metadata and schemas.""" + +import jsl + +from .base import BaseApiSchema, MarkdownField +from ..attack import TACTICS, TACTICS_MAP, TECHNIQUES, technique_lookup + + +INTERVAL_PATTERN = r'\d+[mshd]' +MITRE_URL_PATTERN = r'https://attack.mitre.org/{type}/T[A-Z0-9]+/' + + +# kibana/.../siem/server/lib/detection_engine/routes/schemas/add_prepackaged_rules_schema.ts +# /detection_engine/routes/schemas/schemas.ts +# rule_id is required here +# output_index is not allowed (and instead the space index must be used) +# immutable defaults to true instead of to false and if it is there can only be true +# enabled defaults to false instead of true +# version is a required field that must exist + +# rule types +MACHINE_LEARNING = 'machine_learning' +SAVED_QUERY = 'saved_query' +QUERY = 'query' + + +class Filters(jsl.Document): + """Intermediate schema for handling DSL-like filters.""" + + class FilterMetadata(jsl.Document): + negate = jsl.BooleanField() + type = jsl.StringField() + key = jsl.StringField() + value = jsl.StringField() + disabled = jsl.BooleanField() + indexRefName = jsl.StringField() + alias = jsl.StringField() # null acceptable + params = jsl.DictField(properties={'query': jsl.StringField()}) + + class FilterQuery(jsl.Document): + match = jsl.DictField({ + 'event.action': jsl.DictField(properties={ + 'query': jsl.StringField(), + 'type': jsl.StringField() + }) + }) + + class FilterState(jsl.Document): + store = jsl.StringField() + + class FilterExists(jsl.Document): + field = jsl.StringField() + + exists = jsl.DocumentField(FilterExists) + meta = jsl.DocumentField(FilterMetadata) + state = jsl.DocumentField(FilterState, name='$state') + query = jsl.DocumentField(FilterQuery) + + +class Threat(jsl.Document): + """Threat framework mapping such as MITRE ATT&CK.""" + + class ThreatTactic(jsl.Document): + id = jsl.StringField(enum=TACTICS_MAP.values()) + name = jsl.StringField(enum=TACTICS) + reference = jsl.StringField(MITRE_URL_PATTERN.format(type='tactics')) + + class ThreatTechnique(jsl.Document): + id = jsl.StringField(enum=list(technique_lookup)) + name = jsl.StringField(enum=TECHNIQUES) + reference = jsl.StringField(MITRE_URL_PATTERN.format(type='techniques')) + + framework = jsl.StringField(default='MITRE ATT&CK', required=True) + tactic = jsl.DocumentField(ThreatTactic, required=True) + technique = jsl.ArrayField(jsl.DocumentField(ThreatTechnique), required=True) + + +class ApiSchema78(BaseApiSchema): + """Schema for siem rule in API format.""" + + STACK_VERSION = "7.8" + RULE_TYPES = [MACHINE_LEARNING, SAVED_QUERY, QUERY] + + actions = jsl.ArrayField(required=False) + description = jsl.StringField(required=True) + # api defaults to false if blank + enabled = jsl.BooleanField(default=False, required=False) + # _ required since `from` is a reserved word in python + from_ = jsl.StringField(required=False, default='now-6m', name='from') + false_positives = jsl.ArrayField(jsl.StringField(), required=False) + filters = jsl.ArrayField(jsl.DocumentField(Filters)) + interval = jsl.StringField(pattern=INTERVAL_PATTERN, default='5m', required=False) + max_signals = jsl.IntField(minimum=1, required=False, default=100) # cap a max? + meta = jsl.DictField(required=False) + name = jsl.StringField(required=True) + note = MarkdownField(required=False) + # output_index =jsl.StringField(required=False) # this is NOT allowed! + references = jsl.ArrayField(jsl.StringField(), required=False) + risk_score = jsl.IntField(minimum=0, maximum=100, required=True, default=21) + severity = jsl.StringField(enum=['low', 'medium', 'high', 'critical'], default='low', required=True) + tags = jsl.ArrayField(jsl.StringField(), required=False) + throttle = jsl.StringField(required=False) + timeline_id = jsl.StringField(required=False) + timeline_title = jsl.StringField(required=False) + to = jsl.StringField(required=False, default='now') + + type = jsl.StringField(enum=[MACHINE_LEARNING, QUERY, SAVED_QUERY], required=True) + threat = jsl.ArrayField(jsl.DocumentField(Threat), required=False, min_items=1) + + with jsl.Scope(MACHINE_LEARNING) as ml_scope: + ml_scope.anomaly_threshold = jsl.IntField(required=True, minimum=0) + ml_scope.machine_learning_job_id = jsl.StringField(required=True) + ml_scope.type = jsl.StringField(enum=[MACHINE_LEARNING], required=True, default=MACHINE_LEARNING) + + with jsl.Scope(SAVED_QUERY) as saved_id_scope: + saved_id_scope.index = jsl.ArrayField(jsl.StringField(), required=False) + saved_id_scope.saved_id = jsl.StringField(required=True) + saved_id_scope.type = jsl.StringField(enum=[SAVED_QUERY], required=True, default=SAVED_QUERY) + + with jsl.Scope(QUERY) as query_scope: + query_scope.index = jsl.ArrayField(jsl.StringField(), required=False) + # this is not required per the API but we will enforce it here + query_scope.language = jsl.StringField(enum=['kuery', 'lucene'], required=True, default='kuery') + query_scope.query = jsl.StringField(required=True) + query_scope.type = jsl.StringField(enum=[QUERY], required=True, default=QUERY) + + with jsl.Scope(jsl.DEFAULT_ROLE) as default_scope: + default_scope.type = type diff --git a/detection_rules/schemas/v79.py b/detection_rules/schemas/v79.py new file mode 100644 index 00000000000..0f64ee20fd4 --- /dev/null +++ b/detection_rules/schemas/v79.py @@ -0,0 +1,75 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +"""Definitions for rule metadata and schemas.""" + +import jsl +from .v78 import ApiSchema78 + + +OPERATORS = ['equals'] + + +# kibana/.../siem/server/lib/detection_engine/routes/schemas/add_prepackaged_rules_schema.ts +# /detection_engine/routes/schemas/schemas.ts +# rule_id is required here +# output_index is not allowed (and instead the space index must be used) +# immutable defaults to true instead of to false and if it is there can only be true +# enabled defaults to false instead of true +# version is a required field that must exist + +# rule types +THRESHOLD = "threshold" + + +class RiskScoreMapping(jsl.Document): + field = jsl.StringField(required=True) + operator = jsl.StringField(required=False, enum=OPERATORS) + value = jsl.StringField(required=False) + + +class SeverityMapping(jsl.Document): + field = jsl.StringField(required=True) + operator = jsl.StringField(required=False, enum=OPERATORS) + value = jsl.StringField(required=False) + severity = jsl.StringField(required=False) + + +class ThresholdMapping(jsl.Document): + field = jsl.StringField(required=False) + value = jsl.IntField(minimum=1, required=True) + + +class ApiSchema79(ApiSchema78): + """Schema for siem rule in API format.""" + + STACK_VERSION = "7.9" + RULE_TYPES = ApiSchema78.RULE_TYPES + [THRESHOLD] + + author = jsl.ArrayField(jsl.StringField(default="Elastic"), required=True, min_items=1) + building_block_type = jsl.StringField(required=False) + exceptions_list = jsl.ArrayField(required=False) + license = jsl.StringField(required=True, default="Elastic License") + risk_score_mapping = jsl.ArrayField(jsl.DocumentField(RiskScoreMapping), required=False, min_items=1) + rule_name_override = jsl.StringField(required=False) + severity_mapping = jsl.ArrayField(jsl.DocumentField(SeverityMapping), required=False, min_items=1) + timestamp_override = jsl.StringField(required=False) + + type = jsl.StringField(enum=RULE_TYPES, required=True) + + # there might be a bug in jsl that requires us to redefine these here + query_scope = ApiSchema78.query_scope + saved_id_scope = ApiSchema78.saved_id_scope + ml_scope = ApiSchema78.ml_scope + + with jsl.Scope(THRESHOLD) as threshold_scope: + threshold_scope.index = jsl.ArrayField(jsl.StringField(), required=False) + # this is not required per the API but we will enforce it here + threshold_scope.language = jsl.StringField(enum=['kuery', 'lucene'], required=True, default='kuery') + threshold_scope.query = jsl.StringField(required=True) + threshold_scope.type = jsl.StringField(enum=[THRESHOLD], required=True, default=THRESHOLD) + threshold_scope.threshold = jsl.DocumentField(ThresholdMapping, required=True) + + with jsl.Scope(jsl.DEFAULT_ROLE) as default_scope: + default_scope.type = type diff --git a/tests/test_schemas.py b/tests/test_schemas.py new file mode 100644 index 00000000000..d241dbfcf24 --- /dev/null +++ b/tests/test_schemas.py @@ -0,0 +1,105 @@ +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License; +# you may not use this file except in compliance with the Elastic License. + +"""Test stack versioned schemas.""" +import unittest +import uuid + +from detection_rules.rule import Rule +from detection_rules.schemas import downgrade, CurrentSchema + + +class TestSchemas(unittest.TestCase): + """Test schemas and downgrade functions.""" + + @classmethod + def setUpClass(cls): + cls.compatible_rule = Rule("test.toml", { + "author": ["Elastic"], + "description": "test description", + "language": "kuery", + "license": "Elastic License", + "name": "test rule", + "query": "process.name:test.query", + "risk_score": 21, + "rule_id": str(uuid.uuid4()), + "severity": "low", + "type": "query" + }) + cls.versioned_rule = cls.compatible_rule.copy() + cls.versioned_rule.contents["version"] = 10 + cls.threshold_rule = Rule("test.toml", { + "author": ["Elastic"], + "description": "test description", + "language": "kuery", + "license": "Elastic License", + "name": "test rule", + "query": "process.name:test.query", + "risk_score": 21, + "rule_id": str(uuid.uuid4()), + "severity": "low", + "threshold": { + "field": "destination.bytes", + "value": 75, + }, + "type": "threshold", + }) + + def test_query_downgrade(self): + """Downgrade a standard KQL rule.""" + api_contents = self.compatible_rule.contents + self.assertDictEqual(downgrade(api_contents, CurrentSchema.STACK_VERSION), api_contents) + self.assertDictEqual(downgrade(api_contents, "7.9"), api_contents) + self.assertDictEqual(downgrade(api_contents, "7.9.2"), api_contents) + self.assertDictEqual(downgrade(api_contents, "7.8"), { + # "author": ["Elastic"], + "description": "test description", + "language": "kuery", + # "license": "Elastic License", + "name": "test rule", + "query": "process.name:test.query", + "risk_score": 21, + "rule_id": self.compatible_rule.id, + "severity": "low", + "type": "query" + }) + + with self.assertRaises(ValueError): + downgrade(api_contents, "7.7") + + def test_versioned_downgrade(self): + """Downgrade a KQL rule with version information""" + api_contents = self.versioned_rule.contents + self.assertDictEqual(downgrade(api_contents, CurrentSchema.STACK_VERSION), api_contents) + self.assertDictEqual(downgrade(api_contents, "7.9"), api_contents) + self.assertDictEqual(downgrade(api_contents, "7.9.2"), api_contents) + self.assertDictEqual(downgrade(api_contents, "7.8"), { + # "author": ["Elastic"], + "description": "test description", + "language": "kuery", + # "license": "Elastic License", + "name": "test rule", + "query": "process.name:test.query", + "risk_score": 21, + "rule_id": self.versioned_rule.id, + "severity": "low", + "type": "query", + "version": 10, + }) + + with self.assertRaises(ValueError): + downgrade(api_contents, "7.7") + + def test_threshold_downgrade(self): + """Downgrade a threshold rule that was first introduced in 7.9.""" + api_contents = self.threshold_rule.contents + self.assertDictEqual(downgrade(api_contents, CurrentSchema.STACK_VERSION), api_contents) + self.assertDictEqual(downgrade(api_contents, "7.9"), api_contents) + self.assertDictEqual(downgrade(api_contents, "7.9.2"), api_contents) + + with self.assertRaises(ValueError): + downgrade(api_contents, "7.7") + + with self.assertRaisesRegex(ValueError, "Unsupported rule type"): + downgrade(api_contents, "7.8")