From 1bc193daa321d26af8bc02869305a48ec2c69f87 Mon Sep 17 00:00:00 2001 From: Kit Peters Date: Thu, 27 Aug 2020 12:33:10 -0500 Subject: [PATCH 1/3] "classification" & ignore unknown ALB log fields At some point AWS added two more fields, "classification" and "classification_reason" to the ALB log format. This broke the parser, as it assumed 27 fields, not 29. This adds support for those new fields as well as ignoring any additional fields (per AWS spec). --- .gitignore | 1 + README.md | 113 ++++++++++++++++++++++++++++++++++++++++------------- index.js | 52 ++++++++++++------------ test.js | 14 +++++++ 4 files changed, 129 insertions(+), 51 deletions(-) diff --git a/.gitignore b/.gitignore index 3c3629e..76efb07 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ node_modules +.vscode diff --git a/README.md b/README.md index fa0f240..38a8906 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,10 @@ A basic parser for ALB access logs, forked from elb log parser git@github.com:toshihirock/node-elb-log-parser.git i cannot find alb log parser instead of elb-log-parser. so i modify a bit of the code. Thank you toshihirock! -## When I use this npm? +## When I use this npm -+ ALB Access Log(S3)->Lambda->ElasticSearch. Example [awslabs/amazon-elasticsearch-lambda-samples](https://github.com/awslabs/amazon-elasticsearch-lambda-samples/blob/master/src/s3_lambda_es.js) -+ Analyze ELB Access Log +- ALB Access Log(S3)->Lambda->ElasticSearch. Example [awslabs/amazon-elasticsearch-lambda-samples](https://github.com/awslabs/amazon-elasticsearch-lambda-samples/blob/master/src/s3_lambda_es.js) +- Analyze ELB Access Log ## Install @@ -18,6 +18,49 @@ i cannot find alb log parser instead of elb-log-parser. so i modify a bit of the npm install -g alb-log-parser ``` +## Supported fields + +See for definitions + +- `type` +- `timestamp` +- `elb` +- `client` +- `client_port` +- `target` +- `target_port` +- `request_processing_time` +- `target_processing_time` +- `response_processing_time` +- `elb_status_code` +- `target_status_code` +- `received_bytes` +- `sent_bytes` +- `request_method` +- `request_uri` +- `request_http_version` +- `request_uri_scheme` +- `request_uri_host` +- `request_uri_port` +- `request_uri_path` +- `request_uri_query` +- `request` +- `user_agent` +- `ssl_cipher` +- `ssl_protocol` +- `target_group_arn` +- `trace_id` +- `domain_name` +- `chosen_cert_arn` +- `matched_rule_priority` +- `request_creation_time` +- `actions_executed` +- `redirect_url` +- `error_reason` +- `target:port_list` +- `target_status_code_list` +- `classification` +- `classification_reason` ## Example API usage @@ -25,35 +68,48 @@ npm install -g alb-log-parser node-alb-log-parser$node > var parse = require('./index'); undefined -> parse('http 2015-05-13T23:39:43.945958Z my-loadbalancer 192.168.131.39:2817 10.0.0.1:80 0.000086 0.001048 0.001337 200 200 0 57 "GET https://mytest-111.ap-northeast-1.elb.amazonaws.com:443/p/a/t/h?foo=bar&hoge=fuga HTTP/1.1" "curl/7.38.0" DHE-RSA-AES128-SHA TLSv1.2 arn:aws:elasticloadbalancing:us-west-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067 "Root=1-58337262-36d228ad5d99923122bbe354"') -{ type: 'http', - timestamp: '2015-05-13T23:39:43.945958Z', - elb: 'my-loadbalancer', +> parse('http 2020-08-27T16:35:00.166351Z app/my-loadbalancer/50dc6c495c0c9188 192.168.131.39:2817 192.168.201.251:80 0.000 0.440 0.000 200 200 1107 11912 "GET http://example.com:80/path?foo=bar&baz=bak HTTP/1.1" "Fake/1.0.0 (Linux)" - - arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/my-tg/ffffffffffffffff "Self=1-00000000-111111111111111111111111;Root=1-00000000-222222222222222222222222" "-" "-" 0 2020-08-27T16:34:59.725000Z "forward" "-" "-" "192.168.201.251:80" "200" "-" "-"') +{ + type: 'http', + timestamp: '2020-08-27T16:35:00.166351Z', + elb: 'app/my-loadbalancer/50dc6c495c0c9188', client: '192.168.131.39', client_port: 2817, - target: '10.0.0.1', - request_processing_time: 0.000086, - target_processing_time: 0.001048, - response_processing_time: 0.001337, + target: '192.168.201.251', + target_port: 80, + request_processing_time: 0, + target_processing_time: 0.44, + response_processing_time: 0, elb_status_code: 200, target_status_code: 200, - received_bytes: 0, - sent_bytes: 57, - request: 'GET https://mytest-111.ap-northeast-1.elb.amazonaws.com:443/p/a/t/h?foo=bar&hoge=fuga HTTP/1.1', - user_agent: 'curl/7.38.0', - ssl_cipher: 'DHE-RSA-AES128-SHA', - ssl_protocol: 'TLSv1.2', - target_group_arn: 'arn:aws:elasticloadbalancing:us-west-2:123456789012:targetgroup/my-targets/73e2d6bc24d8a067', - trace_id: 'Root=1-58337262-36d228ad5d99923122bbe354', - target_port: 80, + received_bytes: 1107, + sent_bytes: 11912, request_method: 'GET', - request_uri: 'https://mytest-111.ap-northeast-1.elb.amazonaws.com:443/p/a/t/h?foo=bar&hoge=fuga', + request_uri: 'http://example.com:80/path?foo=bar&baz=bak', request_http_version: 'HTTP/1.1', - request_uri_scheme: 'https:', - request_uri_host: 'mytest-111.ap-northeast-1.elb.amazonaws.com', - request_uri_port: 443, - request_uri_path: '/p/a/t/h', - request_uri_query: 'foo=bar&hoge=fuga' } + request_uri_scheme: 'http:', + request_uri_host: 'example.com', + request_uri_port: 80, + request_uri_path: '/path', + request_uri_query: 'foo=bar&baz=bak', + request: 'GET http://example.com:80/path?foo=bar&baz=bak HTTP/1.1', + user_agent: 'Fake/1.0.0 (Linux)', + ssl_cipher: '-', + ssl_protocol: '-', + target_group_arn: 'arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/my-tg/ffffffffffffffff', + trace_id: 'Self=1-00000000-111111111111111111111111;Root=1-00000000-222222222222222222222222', + domain_name: '-', + chosen_cert_arn: '-', + matched_rule_priority: 0, + request_creation_time: '2020-08-27T16:34:59.725000Z', + actions_executed: 'forward', + redirect_url: '-', + error_reason: '-', + 'target:port_list': '192.168.201.251:80', + target_status_code_list: 200, + classification: '-', + classification_reason: '-' +} > ``` @@ -62,9 +118,12 @@ You get the idea. ## Tests ``` -$npm test + +\$npm test + ``` ## License WTFPL +``` diff --git a/index.js b/index.js index 729c617..8457b93 100755 --- a/index.js +++ b/index.js @@ -4,13 +4,14 @@ * Field names, in order of appearance in the ALB log lines */ const fields = [ - 'type', 'timestamp', 'elb', 'client:port', - 'target:port', 'request_processing_time', 'target_processing_time', 'response_processing_time', - 'elb_status_code', 'target_status_code', 'received_bytes', 'sent_bytes', - 'request', 'user_agent', 'ssl_cipher', 'ssl_protocol', - 'target_group_arn', 'trace_id', 'domain_name', 'chosen_cert_arn', - 'matched_rule_priority', 'request_creation_time', 'actions_executed', 'redirect_url', - 'error_reason', 'target:port_list', 'target_status_code_list' + 'type', 'timestamp', 'elb', 'client:port', + 'target:port', 'request_processing_time', 'target_processing_time', 'response_processing_time', + 'elb_status_code', 'target_status_code', 'received_bytes', 'sent_bytes', + 'request', 'user_agent', 'ssl_cipher', 'ssl_protocol', + 'target_group_arn', 'trace_id', 'domain_name', 'chosen_cert_arn', + 'matched_rule_priority', 'request_creation_time', 'actions_executed', 'redirect_url', + 'error_reason', 'target:port_list', 'target_status_code_list', 'classification', + 'classification_reason' ] module.exports = function (line) { // @@ -43,8 +44,8 @@ if (require.main === module) { /** * Parse one line of an AWS Application Load Balancer log - * - * @param {string} line + * + * @param {string} line */ function parseAlbLogLine(line) { const parsed = {} @@ -52,10 +53,11 @@ function parseAlbLogLine(line) { let finished = false let quoteSeen = false let element = '' - for (const c of line + ' ') { + for (const c of line + ' ') { if (finished) { if (element) { - const fieldName = fields[counter] + + let fieldName = fields[counter] // Convert all numeric strings to numbers if (element.match(/^\d+.?\d*$/)) { element = Number(element) @@ -63,9 +65,11 @@ function parseAlbLogLine(line) { if (fieldName === 'request') { _decorateFromRequest(element, parsed) } - + + if (!fieldName) continue + if (fieldName.match(/^\S+?:port$/)) { - _decorateFromPortField(fieldName, element, parsed) + _decorateFromPortField(fieldName, element, parsed) } else { parsed[fieldName] = element } @@ -76,24 +80,24 @@ function parseAlbLogLine(line) { finished = false } - // treat whitespace as a delimiter *except* when inside of quotes - if (c.match(/^\s$/) && !quoteSeen) finished = true - + // treat whitespace as a delimiter *except* when inside of quotes + if (c.match(/^\s$/) && !quoteSeen) finished = true + if (c === '"') { // beginning or end of a quote delimited string if (quoteSeen) finished = true // if we've seen one quote, this closes the quote delimited string quoteSeen = !quoteSeen // Toggle the quote flag } else { // Append the character to the element unless this character terminates the element - if (!finished) element += c + if (!finished) element += c } } return parsed } function _decorateFromPortField(fieldName, element, parsed) { - // We don't actually send back 'client:port' and 'target:port'; we send back + // We don't actually send back 'client:port' and 'target:port'; we send back // 'client', 'client_port', 'target', and 'target_port' - const field = fieldName.match(/^(\S+?):port/)[1] + const field = fieldName.match(/^(\S+?):port/)[1] const [ip, port] = element.split(':') if (ip === '-1') { parsed[field] = parseInt(ip) @@ -105,13 +109,13 @@ function _decorateFromPortField(fieldName, element, parsed) { } else { parsed[`${field}_port`] = -1 } - return parsed + return parsed } /** * Helper for parseAlbLogLine - * - * @param {string} element - * @param {object} parsed + * + * @param {string} element + * @param {object} parsed */ function _decorateFromRequest(element, parsed) { const url = require('url'); @@ -128,4 +132,4 @@ function _decorateFromRequest(element, parsed) { parsed.request_uri_path = parsedUrl.pathname parsed.request_uri_query = parsedUrl.query return parsed -} \ No newline at end of file +} diff --git a/test.js b/test.js index 67409b4..856f126 100644 --- a/test.js +++ b/test.js @@ -387,3 +387,17 @@ tap.test('successful Lambda', function (t) { t.equal(parsed.error_reason, '-', 'we have error_reason') t.end() }) + +tap.test('classification and classification reason', function(t) { + var parsed = parse('http 2020-08-27T16:35:00.166351Z app/my-loadbalancer/50dc6c495c0c9188 192.168.131.39:2817 192.168.201.251:80 0.000 0.440 0.000 200 200 1107 11912 "GET http://example.com:80/path?foo=bar&baz=bak HTTP/1.1" "Fake/1.0.0 (Linux)" - - arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/my-tg/ffffffffffffffff "Self=1-00000000-111111111111111111111111;Root=1-00000000-222222222222222222222222" "-" "-" 0 2020-08-27T16:34:59.725000Z "forward" "-" "-" "192.168.201.251:80" "200" "-" "-"') + t.equal(parsed.classification, '-', 'we have classification') + t.equal(parsed.classification_reason, '-', 'we have classification_reason') + t.end() +}) + +tap.test('extra fields are ignored', function(t) { + var parsed = parse('http 2020-08-27T16:35:00.166351Z app/my-loadbalancer/50dc6c495c0c9188 192.168.131.39:2817 192.168.201.251:80 0.000 0.440 0.000 200 200 1107 11912 "GET http://example.com:80/path?foo=bar&baz=bak HTTP/1.1" "Fake/1.0.0 (Linux)" - - arn:aws:elasticloadbalancing:us-east-1:123456789012:targetgroup/my-tg/ffffffffffffffff "Self=1-00000000-111111111111111111111111;Root=1-00000000-222222222222222222222222" "-" "-" 0 2020-08-27T16:34:59.725000Z "forward" "-" "-" "192.168.201.251:80" "200" "-" "-" "FOO BAR" BAZ') + const seen = Object.values(parsed).filter(val => (val === 'FOO BAR' || val === 'BAZ')) + t.equal(seen.length, 0, 'Extra fields are ignored') + t.end() +}) From fe55d367f55055d4919620569d68236205be68a3 Mon Sep 17 00:00:00 2001 From: Kit Peters Date: Thu, 27 Aug 2020 13:11:15 -0500 Subject: [PATCH 2/3] version bump --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 0d4d432..0a6d5fe 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "alb-log-parser", - "version": "0.0.5", + "version": "0.0.6", "description": "A basic parser for ALB access logs, forked from elb log parser git@github.com:toshihirock/node-elb-log-parser.git", "main": "index.js", "bin": { From 8547888fdf1187bc73d1d8bdc86e27126a0717d5 Mon Sep 17 00:00:00 2001 From: Kit Peters Date: Thu, 27 Aug 2020 13:15:50 -0500 Subject: [PATCH 3/3] Add credit for idea of ignoring unknown fields --- index.js | 1 + 1 file changed, 1 insertion(+) diff --git a/index.js b/index.js index 8457b93..5954efa 100755 --- a/index.js +++ b/index.js @@ -66,6 +66,7 @@ function parseAlbLogLine(line) { _decorateFromRequest(element, parsed) } + // H/T @jason-linthwaite (https://github.com/jason-linthwaite) if (!fieldName) continue if (fieldName.match(/^\S+?:port$/)) {