noamraph · domule · Mar 13, 2019 · Mar 13, 2019 · Mar 13, 2019 · Mar 13, 2019
diff --git a/README.md b/README.md
@@ -79,6 +79,51 @@ fields that are defined must be equal to the fields in the output document.
 The output document may include other fields. To test that an output field
 doesn't exist, use `"field": null`.
 
+## Testing `[@metadata]`
+
+Logstash allows using a transient field called `[@metadata]`, which is not produced by any output plugins. This is useful if you want to influence the flow of the filter (and/or the output) section, but not by using a field that would be present in the output. The metadata field acts like a bucket for internal variables (it is a nested field on which you can set sub-fields. For example, in the filter section you could set `[@metadata][target_index]` to a desired value for an Elasticsearch index and then in the output section in the `elasticsearch` plugin you could use `%{[@metadata][target_index]}-%{+yyyy.MM.dd}` as your index name pattern.
+
+However, since `[@metadata]` is by definition not produced by the output section, it is consequently not possible to write test cases that verify that the filter section set the correct data in the `[@metadata]` field (or that it correctly didn't set metadata). Once your filter and output sections start relying on metadata it becomes critical that you are also able to write test cases that cover metadata. But, rejoice, as with logstash-filter-test you can still test metadata! The whole `[@metadata]` field will be copied into another field called `[__@metadata]`, allowing you do to something like this:
+
+```
+    [
+        {
+            "message" : "my sample log line",
+            "path" : "/path/to/source/file.log",
+            "host" : "my-host-name"
+        },
+        {
+            "__@metadata" : 
+            {
+                "target_index" : "kittycat"
+            },
+           "@timestamp" : "2019-02-10T00:19:02.106Z",
+           "hostname" : "my-host-name",
+           "level" : "INFO",
+           "source.file.path.raw" : "/path/to/source/file.log",
+           "tags" : null
+        }
+    ]
+```
+
+In addition, if you choose to not remove the temp directory that logstash-filter-test creates during execution (see corresponding command line argument), then you can have a look at the file `pipeline.d/output-ap`. Running your test cases through your filters will generate this output in the Ruby Awesome Print format, which is what Logstash's rubydebug codec uses. The AP format is just another format in addition to the JSON file `output-json` that logstash-filter-test also produces. The AP file is not actually used by logstash-filter-test, but it can be useful for debugging your test cases and filters, because the file contains the `[@metadata]` field exactly as set and modified by your filters (no renaming to `[__@metadata]`). It is also formatted in a more human-readable way than the JSON output file.
+
+
+## Command line arguments
+
+| Argument | Description | Default |
+| -------- | ----------- | ------- |
+| `--filters` | File with Logstash filter definition to test. | `filter.conf` |
+| `--testcases` | File with test cases. | `testcases.js` |
+| `--remove_tempdir` | Whether to remove the temp dir that is created during execution (yes/no). | `yes` (any other value will be equivalent to `no`) |
+| `--logstash` | Path to the Logstash executable. | \[ `/opt/logstash/bin/logstash`, `/usr/share/logstash/bin/logstash` \] |
+
+Example on Windows:
+
+```
+logstash_filter_test.py --remove_tempdir=yes --logstash C:\path\to\logstash-6.2.3\bin\logstash.bat --filters C:\path\to\logstash\indexer\config\filter.conf --testcases C:\path\to\logstash\indexer\test\testcases.js
+```
+
 ## Testing from Python
 
 If you don't like the testcase file format, it's easy to test by yourself:

diff --git a/logstash_filter_run.py b/logstash_filter_run.py
@@ -17,6 +17,14 @@
   pipeline.workers: 1
 """
 
+# Filter config which copies the otherwise hidden @metadata field and its sub-fields to a separate field
+# that will be accessible in the output. This allows writing test cases that can assert metadata values.
+POST_PROCESSOR_FILTER_CONF = """\
+filter {
+  mutate { copy => { "[@metadata]" => "__@metadata" } }
+}
+"""
+
 INPUT_OUTPUT_CONF = """\
 input {
   stdin {
@@ -27,10 +35,13 @@
   file {
     path => "%s"
   }
+  file {
+    path => "%s"
+    codec => rubydebug { metadata => true }
+  }
 }
 """
 
-
 def logstash_filter_run(inputs, filter_def, logstash_bin=None, remove_tempdir=True):
     """
     Run a bunch of json through logstash given the filter definition
@@ -59,13 +70,25 @@ def logstash_filter_run(inputs, filter_def, logstash_bin=None, remove_tempdir=Tr
     os.mkdir(config_dir)
     os.mkdir(pipeline_dir)
     open(join(config_dir, 'logstash.yml'), 'w').close()
+
     with open(join(config_dir, 'pipelines.yml'), 'w') as f:
-        f.write(PIPELINES_YML.format(pipeline_dir))
-    output_fn = join(workdir, 'output')
+        if os.name == 'nt':
+            # Somehow, on Windows the path has to be prefixed with a slash (in front of the drive letter)
+            # and the path separator has to be the forward slash.
+            formatted_pipeline_dir = '/' + pipeline_dir.replace('\\', '/')
+        else:
+            formatted_pipeline_dir = pipeline_dir
+        f.write(PIPELINES_YML.format(formatted_pipeline_dir))
+
+    output_json_fn = join(workdir, 'output-json')
+    output_ap_fn = join(workdir, 'output-ap')
     with open(join(pipeline_dir, 'io.conf'), 'w') as f:
-        f.write(INPUT_OUTPUT_CONF % output_fn)
-    with open(join(pipeline_dir, 'filter.conf'), 'w') as f:
+        f.write(INPUT_OUTPUT_CONF % (output_json_fn, output_ap_fn))
+    with open(join(pipeline_dir, 'filter_1_candidate.conf'), 'w') as f:
         f.write(filter_def)
+    with open(join(pipeline_dir, 'filter_2_post_processor.conf'), 'w') as f:
+        f.write(POST_PROCESSOR_FILTER_CONF)
+
     inputs_s = ''.join(s+'\n' for s in input_jsons)
     args = [logstash_bin, '--log.level=warn',
             '--path.settings', config_dir, '--path.data', data_dir]
@@ -76,7 +99,7 @@ def logstash_filter_run(inputs, filter_def, logstash_bin=None, remove_tempdir=Tr
     if rc != 0:
         raise RuntimeError("logstash returned non-zero return code {}"
                            .format(rc))
-    output_lines = list(open(output_fn))
+    output_lines = list(open(output_json_fn))
     if len(output_lines) != len(inputs):
         raise RuntimeError("Received {} outputs, expecting {}"
                            .format(len(output_lines), len(inputs)))

diff --git a/logstash_filter_test.py b/logstash_filter_test.py
@@ -5,6 +5,7 @@
 import json
 
 from logstash_filter_run import logstash_filter_run
+from logstash_filter_run import LOGSTASH_BIN_ALTERNATIVES
 
 
 # This is copied from https://github.com/linjackson78/jstyleson
@@ -110,7 +111,6 @@ def _remove_last_comma(str_list, before_index):
     if str_list[i] == ',':
         str_list[i] = ''
 
-
 def print_results(testcases, outputs):
     expecteds = [expected for _inp, expected in testcases]
     n_errs = 0
@@ -138,11 +138,11 @@ def json_dumps(x):
     return n_errs
 
 
-def logstash_filter_test(filter_fn='filter.conf', testcases_fn='testcases.js'):
+def logstash_filter_test(filter_fn='filter.conf', testcases_fn='testcases.js', logstash_bin_fn=None, remove_tempdir="yes"):
     testcases = json.loads(dispose(open(testcases_fn).read()))
     inputs = [inp for inp, _expected in testcases]
     filter_def = open(filter_fn).read()
-    outputs = logstash_filter_run(inputs, filter_def)
+    outputs = logstash_filter_run(inputs, filter_def, logstash_bin_fn, remove_tempdir == "yes")
     n_errs = print_results(testcases, outputs)
     return outputs, n_errs
 
@@ -154,9 +154,14 @@ def main():
                         help="File with logstash filter definition to test. default: filter.conf")
     parser.add_argument("--testcases", default="testcases.js",
                         help="File with testcases. default: testcases.js")
+    parser.add_argument("--logstash", default=None,
+                        help="Path to Logstash executable. default: " + ",".join(LOGSTASH_BIN_ALTERNATIVES))
+    parser.add_argument("--remove_tempdir", default="yes",
+                        help="Whether to remove the temp dir (yes/no). default: yes")
+
     args = parser.parse_args()
 
-    _outputs, n_errs = logstash_filter_test(args.filters, args.testcases)
+    _outputs, n_errs = logstash_filter_test(args.filters, args.testcases, args.logstash, args.remove_tempdir)
 
     return 0 if n_errs == 0 else 1