From 6e38af8ed25d0f4c6e523beafba9e34f94124f51 Mon Sep 17 00:00:00 2001 From: Andrea Spacca Date: Tue, 23 Aug 2022 10:58:18 +0900 Subject: [PATCH 1/5] allow for json/ndjson content type with charset --- .../input/awss3/input_integration_test.go | 19 ++++++++++++------- x-pack/filebeat/input/awss3/s3_objects.go | 2 +- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/x-pack/filebeat/input/awss3/input_integration_test.go b/x-pack/filebeat/input/awss3/input_integration_test.go index a5086261a98..ae6151d62b1 100644 --- a/x-pack/filebeat/input/awss3/input_integration_test.go +++ b/x-pack/filebeat/input/awss3/input_integration_test.go @@ -18,6 +18,7 @@ import ( "path" "path/filepath" "runtime" + "strings" "testing" "time" @@ -88,7 +89,6 @@ file_selectors: - regex: 'events-array.json$' expand_event_list_from_field: Events - content_type: application/json include_s3_metadata: - last-modified - x-amz-version-id @@ -97,7 +97,6 @@ file_selectors: - Content-Type - regex: '\.(?:nd)?json(\.gz)?$' - content_type: application/json - regex: 'multiline.txt$' parsers: @@ -117,7 +116,6 @@ file_selectors: - regex: 'events-array.json$' expand_event_list_from_field: Events - content_type: application/json include_s3_metadata: - last-modified - x-amz-version-id @@ -126,7 +124,6 @@ file_selectors: - Content-Type - regex: '\.(?:nd)?json(\.gz)?$' - content_type: application/json - regex: 'multiline.txt$' parsers: @@ -328,11 +325,19 @@ func uploadS3TestFiles(t *testing.T, region, bucket string, filenames ...string) t.Fatalf("Failed to open file %q, %v", filename, err) } + contentType := "" + if strings.HasSuffix(filename, "ndjson") || strings.HasSuffix(filename, "ndjson.gz") { + contentType = contentTypeNDJSON + } else if strings.HasSuffix(filename, "json") || strings.HasSuffix(filename, "json.gz") { + contentType = contentTypeJSON + } + // Upload the file to S3. result, err := uploader.Upload(context.Background(), &s3.PutObjectInput{ - Bucket: aws.String(bucket), - Key: aws.String(filepath.Base(filename)), - Body: bytes.NewReader(data), + Bucket: aws.String(bucket), + Key: aws.String(filepath.Base(filename)), + Body: bytes.NewReader(data), + ContentType: aws.String(contentType), }) if err != nil { t.Fatalf("Failed to upload file %q: %v", filename, err) diff --git a/x-pack/filebeat/input/awss3/s3_objects.go b/x-pack/filebeat/input/awss3/s3_objects.go index 826b65f1aca..a1d70c604c2 100644 --- a/x-pack/filebeat/input/awss3/s3_objects.go +++ b/x-pack/filebeat/input/awss3/s3_objects.go @@ -153,7 +153,7 @@ func (p *s3ObjectProcessor) ProcessS3Object() error { // Process object content stream. switch { - case contentType == contentTypeJSON || contentType == contentTypeNDJSON: + case strings.HasPrefix(contentType, contentTypeJSON) || strings.HasPrefix(contentType, contentTypeNDJSON): err = p.readJSON(reader) default: err = p.readFile(reader) From 75825d3ab94376f7b417b6ac17273e7d183b8b81 Mon Sep 17 00:00:00 2001 From: Andrea Spacca Date: Tue, 23 Aug 2022 11:22:51 +0900 Subject: [PATCH 2/5] add '; charset=UTF-8' in integration tests --- x-pack/filebeat/input/awss3/input_integration_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/awss3/input_integration_test.go b/x-pack/filebeat/input/awss3/input_integration_test.go index ae6151d62b1..6fc3da78d46 100644 --- a/x-pack/filebeat/input/awss3/input_integration_test.go +++ b/x-pack/filebeat/input/awss3/input_integration_test.go @@ -327,9 +327,9 @@ func uploadS3TestFiles(t *testing.T, region, bucket string, filenames ...string) contentType := "" if strings.HasSuffix(filename, "ndjson") || strings.HasSuffix(filename, "ndjson.gz") { - contentType = contentTypeNDJSON + contentType = contentTypeNDJSON + "; charset=UTF-8" } else if strings.HasSuffix(filename, "json") || strings.HasSuffix(filename, "json.gz") { - contentType = contentTypeJSON + contentType = contentTypeJSON + "; charset=UTF-8" } // Upload the file to S3. From 4db06b133c8c2d6a0f566a6b6b56303c6a32f24d Mon Sep 17 00:00:00 2001 From: Andrea Spacca Date: Tue, 23 Aug 2022 11:27:03 +0900 Subject: [PATCH 3/5] changelog --- CHANGELOG.next.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 65db6b93809..8b0132cddd5 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -59,6 +59,7 @@ https://github.com/elastic/beats/compare/v8.2.0\...main[Check the HEAD diff] - Fix handling of Checkpoint event for R81. {issue}32380[32380] {pull}32458[32458] - Fix a hang on `apt-get update` stage in packaging. {pull}32580[32580] - gcp-pubsub input: Restart Pub/Sub client on all errors. {issue}32550[32550] {pull}32712[32712] +- Fix not parsing as json when `json` and `ndjson` content types have charset information in `aws-s3` input {pull}32767[32767] *Heartbeat* From e644016d11ca993c8192e3343406ee403b8652c0 Mon Sep 17 00:00:00 2001 From: Andrea Spacca Date: Tue, 23 Aug 2022 13:57:58 +0900 Subject: [PATCH 4/5] assessing CI --- x-pack/filebeat/input/awss3/input_integration_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/awss3/input_integration_test.go b/x-pack/filebeat/input/awss3/input_integration_test.go index 6fc3da78d46..df457518a04 100644 --- a/x-pack/filebeat/input/awss3/input_integration_test.go +++ b/x-pack/filebeat/input/awss3/input_integration_test.go @@ -327,9 +327,9 @@ func uploadS3TestFiles(t *testing.T, region, bucket string, filenames ...string) contentType := "" if strings.HasSuffix(filename, "ndjson") || strings.HasSuffix(filename, "ndjson.gz") { - contentType = contentTypeNDJSON + "; charset=UTF-8" + contentType = "let-CI-fail-" + contentTypeNDJSON + "; charset=UTF-8" } else if strings.HasSuffix(filename, "json") || strings.HasSuffix(filename, "json.gz") { - contentType = contentTypeJSON + "; charset=UTF-8" + contentType = "let-CI-fail-" + contentTypeJSON + "; charset=UTF-8" } // Upload the file to S3. From f947357662e6d2ff609cef64a4386ee55045690a Mon Sep 17 00:00:00 2001 From: Andrea Spacca Date: Tue, 23 Aug 2022 18:07:59 +0900 Subject: [PATCH 5/5] revert correct behaviour --- x-pack/filebeat/input/awss3/input_integration_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/x-pack/filebeat/input/awss3/input_integration_test.go b/x-pack/filebeat/input/awss3/input_integration_test.go index df457518a04..6fc3da78d46 100644 --- a/x-pack/filebeat/input/awss3/input_integration_test.go +++ b/x-pack/filebeat/input/awss3/input_integration_test.go @@ -327,9 +327,9 @@ func uploadS3TestFiles(t *testing.T, region, bucket string, filenames ...string) contentType := "" if strings.HasSuffix(filename, "ndjson") || strings.HasSuffix(filename, "ndjson.gz") { - contentType = "let-CI-fail-" + contentTypeNDJSON + "; charset=UTF-8" + contentType = contentTypeNDJSON + "; charset=UTF-8" } else if strings.HasSuffix(filename, "json") || strings.HasSuffix(filename, "json.gz") { - contentType = "let-CI-fail-" + contentTypeJSON + "; charset=UTF-8" + contentType = contentTypeJSON + "; charset=UTF-8" } // Upload the file to S3.