From aff4786f95f201678b04c3ab6086318270623e51 Mon Sep 17 00:00:00 2001 From: Daniel Mitterdorfer Date: Tue, 13 Mar 2018 13:35:09 +0100 Subject: [PATCH] Update eventdata track for Rally 0.9.0 (#6) With this commit we update the eventdata track to match Rally's updated track syntax: * Replace "index" operation with "bulk" * Remove runners that are now in Rally core * Move index-settings to new create-index operation * Update indices definition --- .../challenges/bulk-size-evaluation.json | 16 +++++-- .../combined-indexing-and-querying.json | 42 ++++++----------- eventdata/challenges/shard-size-on-disk.json | 20 ++++++--- eventdata/challenges/shard-sizing.json | 20 ++++++--- eventdata/elasticlogs-index.json | 45 +++++++++++++++++++ .../operations/generate-historic-data.json | 10 ++--- eventdata/operations/indexing.json | 28 ++++++------ .../elasticlogs_bulk_source.py | 14 +++--- .../elasticlogs_kibana_source.py | 3 +- .../sample_based_bulk_source.py | 14 +++--- eventdata/runners/loadtemplate_runner.py | 28 ------------ eventdata/track.json | 29 +++++++----- eventdata/track.py | 5 +-- 13 files changed, 151 insertions(+), 123 deletions(-) create mode 100644 eventdata/elasticlogs-index.json delete mode 100755 eventdata/runners/loadtemplate_runner.py diff --git a/eventdata/challenges/bulk-size-evaluation.json b/eventdata/challenges/bulk-size-evaluation.json index b02fd783cd08f..c48101cbf508a 100644 --- a/eventdata/challenges/bulk-size-evaluation.json +++ b/eventdata/challenges/bulk-size-evaluation.json @@ -1,15 +1,23 @@ { "name": "bulk-size-evaluation", "description": "Indexes with different bulk sizes. IDs are autogenerated by Elasticsearch, meaning there are no conflicts.", - "index-settings": { - "index.number_of_replicas": {{ replica_count }}, - "index.number_of_shards": {{ shard_count }} - }, "meta": { "client_count": {{ clients }}, "benchmark_type": "indexing_bulksize" }, "schedule": [ + { + "operation": "delete-index" + }, + { + "operation": { + "operation-type": "create-index", + "settings": { + "index.number_of_replicas": {{ replica_count }}, + "index.number_of_shards": {{ shard_count }} + } + } + }, { "operation": "index-append-125", "warmup-time-period": 0, diff --git a/eventdata/challenges/combined-indexing-and-querying.json b/eventdata/challenges/combined-indexing-and-querying.json index 262cdecd299a2..996d97e01cc45 100644 --- a/eventdata/challenges/combined-indexing-and-querying.json +++ b/eventdata/challenges/combined-indexing-and-querying.json @@ -7,41 +7,27 @@ }, "schedule": [ { - "operation": "deleteindex_elasticlogs_i-*", - "iterations": 1 + "operation": "deleteindex_elasticlogs_i-*" }, { - "operation": "fieldstats_elasticlogs_q-*_ELASTICLOGS", - "iterations": 1 + "operation": "fieldstats_elasticlogs_q-*_ELASTICLOGS" }, { - "parallel": { - "warmup-iterations": 0, - "iterations": 200, - "tasks": [ - { - "operation": "relative-kibana-content_issues-dashboard_75%", - "meta": { - "query_type": "warmup" - } - } - ] + "operation": "relative-kibana-content_issues-dashboard_75%", + "warmup-iterations": 0, + "iterations": 200, + "meta": { + "query_type": "warmup" } }, { - "parallel": { - "warmup-time-period": 0, - "time-period": {{ rate_limit_duration_secs }}, - "tasks": [ - { - "operation": "relative-kibana-content_issues-dashboard_50%", - "target-interval": 60, - "meta": { - "target_indexing_rate": 0, - "query_type": "historic" - } - } - ] + "operation": "relative-kibana-content_issues-dashboard_50%", + "target-interval": 60, + "warmup-time-period": 0, + "time-period": {{ rate_limit_duration_secs }}, + "meta": { + "target_indexing_rate": 0, + "query_type": "historic" } }, { diff --git a/eventdata/challenges/shard-size-on-disk.json b/eventdata/challenges/shard-size-on-disk.json index e2dbf01e388b8..517a370742952 100644 --- a/eventdata/challenges/shard-size-on-disk.json +++ b/eventdata/challenges/shard-size-on-disk.json @@ -1,16 +1,24 @@ { "name": "shard-size-on-disk", "description": "Indexes sets of 2M events into Elasticsearch, followed by index statistics in order to track how index size depends on event count. IDs are autogenerated by Elasticsearch, meaning there are no conflicts. This process is repeatedly run until 100M events have been indexed into the shard.", - "index-settings": { - "index.number_of_replicas": 0, - "index.number_of_shards": 1, - "index.refresh_interval": "5s", - "index.codec": "best_compression" - }, "meta": { "benchmark_type": "shard-size-on-disk" }, "schedule": [ + { + "operation": "delete-index" + }, + { + "operation": { + "operation-type": "create-index", + "settings": { + "index.number_of_replicas": 0, + "index.number_of_shards": 1, + "index.refresh_interval": "5s", + "index.codec": "best_compression" + } + } + }, {% for n in range(1,50) %} { "name": "index-append-1000-shard-sizing-{{n}}", diff --git a/eventdata/challenges/shard-sizing.json b/eventdata/challenges/shard-sizing.json index b4be5842a1c62..7e571e112cc15 100644 --- a/eventdata/challenges/shard-sizing.json +++ b/eventdata/challenges/shard-sizing.json @@ -1,16 +1,24 @@ { "name": "shard-sizing", "description": "Indexes sets of 2M events into Elasticsearch, followed by index statistics and simulated Kibana queries. IDs are autogenerated by Elasticsearch, meaning there are no conflicts. This process is repeatedly run until 50M events have been indexed into the shard. This allows query latency to be evaluated as a function of shard size.", - "index-settings": { - "index.number_of_replicas": 0, - "index.number_of_shards": 1, - "index.refresh_interval": "5s", - "index.codec": "best_compression" - }, "meta": { "benchmark_type": "shard-sizing" }, "schedule": [ + { + "operation": "delete-index" + }, + { + "operation": { + "operation-type": "create-index", + "settings": { + "index.number_of_replicas": 0, + "index.number_of_shards": 1, + "index.refresh_interval": "5s", + "index.codec": "best_compression" + } + } + }, {% for n in range(1,shard_sizing_iterations) %} { "name": "index-append-1000-shard-sizing-iteration-{{n}}", diff --git a/eventdata/elasticlogs-index.json b/eventdata/elasticlogs-index.json new file mode 100644 index 0000000000000..58e26ee0142f1 --- /dev/null +++ b/eventdata/elasticlogs-index.json @@ -0,0 +1,45 @@ +{ + "mappings": { + "logs": { + "properties": { + "@timestamp": { "type": "date" }, + "message": { "type": "text", "index": false }, + "agent": { "type": "keyword", "ignore_above": 256 }, + "bytes": { "type": "integer" }, + "clientip": { "type": "ip" }, + "httpversion": { "type": "keyword", "ignore_above": 256 }, + "response": { "type": "short" }, + "verb": { "type": "keyword", "ignore_above": 256 }, + "tags": { "type": "keyword", "ignore_above": 256 }, + "geoip" : { + "properties" : { + "country_name" : { "type": "keyword" }, + "location" : { "type": "geo_point" } + } + }, + "useragent": { + "properties": { + "name": { "type": "keyword", "ignore_above": 256 }, + "os": { "type": "keyword", "ignore_above": 256 }, + "os_name": { "type": "keyword", "ignore_above": 256 } + } + }, + "request": { + "norms": false, + "type": "text", + "fields": { + "keyword": { "ignore_above": 256, "type": "keyword" } + } + }, + "referrer": { + "norms": false, + "type": "text", + "fields": { + "keyword": { "ignore_above": 256, "type": "keyword" } + } + } + }, + "_all": { "enabled": false } + } + } +} \ No newline at end of file diff --git a/eventdata/operations/generate-historic-data.json b/eventdata/operations/generate-historic-data.json index c9667159073ca..3bd85aac7c5ce 100644 --- a/eventdata/operations/generate-historic-data.json +++ b/eventdata/operations/generate-historic-data.json @@ -1,7 +1,8 @@ { "name": "load_elasticlogs_snapshot_template", - "operation-type": "load_template", - "index_template_body": { + "operation-type": "create-index-template", + "template": "elasticlogs-historic", + "body": { "template": "elasticlogs-h-*", "settings": { "index.refresh_interval": "10s", @@ -13,12 +14,11 @@ {% include "mappings.json" %} , "aliases": {} - }, - "index_template_name": "elasticlogs-historic" + } }, { "name": "index_6x_speed", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "index": "elasticlogs-h---
", "daily_index": true, diff --git a/eventdata/operations/indexing.json b/eventdata/operations/indexing.json index a8b06a7dd7988..18a8525ff4789 100644 --- a/eventdata/operations/indexing.json +++ b/eventdata/operations/indexing.json @@ -1,60 +1,60 @@ { "name": "index-append-50000", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "bulk-size": 50000 }, { "name": "index-append-20000", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "bulk-size": 20000 }, { "name": "index-append-10000", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "bulk-size": 10000 }, { "name": "index-append-5000", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "bulk-size": 5000 }, { "name": "index-append-2000", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "bulk-size": 2000 }, { "name": "index-append-1000", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "bulk-size": 1000 }, { "name": "index-append-500", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "bulk-size": 500 }, { "name": "index-append-250", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "bulk-size": 250 }, { "name": "index-append-125", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "bulk-size": 125 }, { "name": "index-append-1000-shard-sizing", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "index": "elasticlogs", "starting_point": "2017-01-01:02:00:00", @@ -63,14 +63,14 @@ }, { "name": "index-append-1000-elasticlogs_q_write", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "index": "elasticlogs_q_write", "bulk-size": 1000 }, { "name": "index-append-1000-elasticlogs_i_write", - "operation-type": "index", + "operation-type": "bulk", "param-source": "elasticlogs_bulk", "index": "elasticlogs_i_write", "bulk-size": 1000 @@ -183,6 +183,6 @@ }, { "name": "deleteindex_elasticlogs_i-*", - "operation-type": "deleteindex", - "index_pattern": "elasticlogs_i-*" + "operation-type": "delete-index", + "index": "elasticlogs_i-*" } \ No newline at end of file diff --git a/eventdata/parameter_sources/elasticlogs_bulk_source.py b/eventdata/parameter_sources/elasticlogs_bulk_source.py index ee38a3f788548..d9f328c2554de 100755 --- a/eventdata/parameter_sources/elasticlogs_bulk_source.py +++ b/eventdata/parameter_sources/elasticlogs_bulk_source.py @@ -45,8 +45,8 @@ class ElasticlogsBulkSource: '2016-12-20 20:12:32' and an acceleration factor of 2.0, events will be generated in timestamp sequence covering a 2-hour window, '2017-02-20 20:12:32' to '2017-02-20 22:12:32' (approximately). """ - def __init__(self, indices, params): - self._indices = indices + def __init__(self, track, params, **kwargs): + self._indices = track.indices self._params = params self._randomevent = RandomEvent(params) @@ -56,19 +56,19 @@ def __init__(self, indices, params): self._default_index = False if 'index' not in params.keys(): - if len(indices) > 1: - logger.debug("[bulk] More than one index specified in track configuration. Will use the first one ({})".format(indices[0].name)) + if len(self._indices) > 1: + logger.debug("[bulk] More than one index specified in track configuration. Will use the first one ({})".format(self._indices[0].name)) else: - logger.debug("[bulk] Using index specified in track configuration ({})".format(indices[0].name)) + logger.debug("[bulk] Using index specified in track configuration ({})".format(self._indices[0].name)) - self._params['index'] = indices[0].name + self._params['index'] = self._indices[0].name self._default_index = True else: logger.debug("[bulk] Index pattern specified in parameters ({}) will be used".format(params['index'])) if 'type' not in params.keys(): - self._params['type'] = indices[0].types[0].name + self._params['type'] = self._indices[0].types[0].name def partition(self, partition_index, total_partitions): return self diff --git a/eventdata/parameter_sources/elasticlogs_kibana_source.py b/eventdata/parameter_sources/elasticlogs_kibana_source.py index 305199029901c..ab62df548cdff 100755 --- a/eventdata/parameter_sources/elasticlogs_kibana_source.py +++ b/eventdata/parameter_sources/elasticlogs_kibana_source.py @@ -55,8 +55,7 @@ class ElasticlogsKibanaSource: '4d' - Consists of a number and either m (minutes), h (hours) or d (days). Can not be lower than 1 minute. '10%' - Length given as percentage of window size. Only available when fieldstats_id have been specified. """ - def __init__(self, indices, params): - self._indices = indices + def __init__(self, track, params, **kwargs): self._params = params self._index_pattern = 'elasticlogs-*' self._query_string_list = ['*'] diff --git a/eventdata/parameter_sources/sample_based_bulk_source.py b/eventdata/parameter_sources/sample_based_bulk_source.py index 384efffda9148..5cea6a75571b8 100755 --- a/eventdata/parameter_sources/sample_based_bulk_source.py +++ b/eventdata/parameter_sources/sample_based_bulk_source.py @@ -76,8 +76,8 @@ class SampleBasedBulkSource: not supported) are to be replaced by the event timestamp. If no timestamp is to be added or modified, this parameter can be left out. """ - def __init__(self, indices, params): - self._indices = indices + def __init__(self, track, params, **kwargs): + self._indices = track.indices self._params = params self._samples = [] self._next_index = 0 @@ -88,19 +88,19 @@ def __init__(self, indices, params): self._default_index = False if 'index' not in params.keys(): - if len(indices) > 1: - logger.debug("[bulk] More than one index specified in track configuration. Will use the first one ({})".format(indices[0].name)) + if len(self._indices) > 1: + logger.debug("[bulk] More than one index specified in track configuration. Will use the first one ({})".format(self._indices[0].name)) else: - logger.debug("[bulk] Using index specified in track configuration ({})".format(indices[0].name)) + logger.debug("[bulk] Using index specified in track configuration ({})".format(self._indices[0].name)) - self._params['index'] = indices[0].name + self._params['index'] = self._indices[0].name self._default_index = True else: logger.debug("[bulk] Index pattern specified in parameters ({}) will be used".format(params['index'])) if 'type' not in params.keys(): - self._params['type'] = indices[0].types[0].name + self._params['type'] = self._indices[0].types[0].name if 'timestamp_field' not in params.keys(): self._params['timestamp_field'] = [] diff --git a/eventdata/runners/loadtemplate_runner.py b/eventdata/runners/loadtemplate_runner.py deleted file mode 100755 index 0356870547bed..0000000000000 --- a/eventdata/runners/loadtemplate_runner.py +++ /dev/null @@ -1,28 +0,0 @@ -import logging -import json -import elasticsearch - -logger = logging.getLogger("track.elasticlogs") - -def loadtemplate(es, params): - """ - Creates an index in Elasticsearch with given aliases. It also uploads configured template if configured. - This runner can be used to set up indices that will use the rollover mechanism at the start of a benchmark. - - It expects the parameter hash to contain the following keys: - "index_template_body" - Index template body. - "index_template_name" - Specifies the name of the index template being uploaded if one has been specified through the - "index_template_body" parameter. Defaults to 'elasticlogs' - """ - if 'index_template_body' in params: - if 'index_template_name' in params: - template_name = params['index_template_name'] - else: - template_name = 'elasticlogs' - - if logger.isEnabledFor(logging.DEBUG): - logger.debug("[createindex] Upload index template {} => {}".format(template_name, json.dumps(params['index_template_body']))) - - es.indices.put_template(name=template_name, body=params['index_template_body']) - - return 1, "ops" diff --git a/eventdata/track.json b/eventdata/track.json index 5dd6b61b53f05..701f0bdaff216 100644 --- a/eventdata/track.json +++ b/eventdata/track.json @@ -19,17 +19,14 @@ {% import "rally.helpers" as rally with context %} { - "short-description": "Track for simulating different aspects of event-based use cases.", - "description": "This track contains data generators and associated queries and aggregations to allow simulation of event-based use cases.", + "version": 2, + "description": "Track for simulating different aspects of event-based use cases.", "indices": [ { "name": "elasticlogs", - "types": [ - { - "name": "logs", - "mapping": "mappings.json" - } - ] + "body": "elasticlogs-index.json", + "types": [ "logs" ], + "auto-managed": false } ], "#COMMENT": "'operations' just define all possible operations but this is not the actual execution schedule. The execution is defined in the 'challenges' block and it just refers to the defined operations. The intention between this separation is to allow reuse of operations", @@ -41,14 +38,22 @@ "name": "append-no-conflicts", "default": true, "description": "Runs indexing with a batch size of 1000 for 20 minutes. IDs are autogenerated by Elasticsearch, meaning there are no conflicts.", - "index-settings": { - "index.number_of_replicas": {{ replica_count }}, - "index.number_of_shards": {{ shard_count }} - }, "meta": { "client_count": {{ clients }} }, "schedule": [ + { + "operation": "delete-index" + }, + { + "operation": { + "operation-type": "create-index", + "settings": { + "index.number_of_replicas": {{ replica_count }}, + "index.number_of_shards": {{ shard_count }} + } + } + }, { "operation": "index-append-1000", "warmup-time-period": 0, diff --git a/eventdata/track.py b/eventdata/track.py index 2ad564cc981c9..662534ebb7d50 100644 --- a/eventdata/track.py +++ b/eventdata/track.py @@ -3,21 +3,18 @@ from eventdata.parameter_sources.sample_based_bulk_source import SampleBasedBulkSource from eventdata.runners import rollover_runner from eventdata.runners import createindex_runner -from eventdata.runners import loadtemplate_runner -from eventdata.runners import deleteindex_runner from eventdata.runners import kibana_runner from eventdata.runners import indicesstats_runner from eventdata.runners import nodestorage_runner from eventdata.runners import fieldstats_runner + def register(registry): registry.register_param_source("elasticlogs_bulk", ElasticlogsBulkSource) registry.register_param_source("elasticlogs_kibana", ElasticlogsKibanaSource) registry.register_param_source("sample_based_bulk", SampleBasedBulkSource) registry.register_runner("rollover", rollover_runner.rollover) registry.register_runner("createindex", createindex_runner.createindex) - registry.register_runner("load_template", loadtemplate_runner.loadtemplate) - registry.register_runner("deleteindex", deleteindex_runner.deleteindex) registry.register_runner("kibana", kibana_runner.kibana) registry.register_runner("indicesstats", indicesstats_runner.indicesstats) registry.register_runner("node_storage", nodestorage_runner.nodestorage)