Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[filebeat][azure-blob-storage] - Fixed concurrency & flakey tests issue #36124

Merged
merged 14 commits into from
Aug 4, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
178 changes: 152 additions & 26 deletions x-pack/filebeat/input/azureblobstorage/input_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ const (

func Test_StorageClient(t *testing.T) {
tests := []struct {
name string
baseConfig map[string]interface{}
mockHandler func() http.Handler
expected map[string]bool
isError error
name string
baseConfig map[string]interface{}
mockHandler func() http.Handler
expected map[string]bool
expectedError error
}{
{
name: "SingleContainerWithPoll_NoErr",
Expand Down Expand Up @@ -148,9 +148,9 @@ func Test_StorageClient(t *testing.T) {
},
},
},
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
isError: mock.NotFoundErr,
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
expectedError: mock.NotFoundErr,
},
{
name: "SingleContainerWithoutPoll_InvalidBucketErr",
Expand All @@ -166,9 +166,9 @@ func Test_StorageClient(t *testing.T) {
},
},
},
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
isError: mock.NotFoundErr,
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
expectedError: mock.NotFoundErr,
},
{
name: "TwoContainersWithPoll_InvalidBucketErr",
Expand All @@ -187,9 +187,9 @@ func Test_StorageClient(t *testing.T) {
},
},
},
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
isError: mock.NotFoundErr,
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
expectedError: mock.NotFoundErr,
},
{
name: "SingleBucketWithPoll_InvalidConfigValue",
Expand All @@ -205,9 +205,9 @@ func Test_StorageClient(t *testing.T) {
},
},
},
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
isError: errors.New("requires value <= 5000 accessing 'max_workers'"),
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
expectedError: errors.New("requires value <= 5000 accessing 'max_workers'"),
},
{
name: "TwoBucketWithPoll_InvalidConfigValue",
Expand All @@ -226,9 +226,9 @@ func Test_StorageClient(t *testing.T) {
},
},
},
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
isError: errors.New("requires value <= 5000 accessing 'max_workers'"),
mockHandler: mock.AzureStorageServer,
expected: map[string]bool{},
expectedError: errors.New("requires value <= 5000 accessing 'max_workers'"),
},
{
name: "ReadJSON",
Expand Down Expand Up @@ -321,7 +321,7 @@ func Test_StorageClient(t *testing.T) {
conf := config{}
err := cfg.Unpack(&conf)
if err != nil {
assert.EqualError(t, err, tt.isError.Error())
assert.EqualError(t, err, tt.expectedError.Error())
return
}
input := newStatelessInput(conf, serv.URL+"/")
Expand Down Expand Up @@ -349,14 +349,14 @@ func Test_StorageClient(t *testing.T) {
t.Cleanup(func() { timeout.Stop() })

if len(tt.expected) == 0 {
if tt.isError != nil && g.Wait() != nil {
if tt.expectedError != nil && g.Wait() != nil {
//nolint:errorlint // This will never be a wrapped error
if tt.isError == mock.NotFoundErr {
if tt.expectedError == mock.NotFoundErr {
arr := strings.Split(g.Wait().Error(), "\n")
errStr := strings.Join(arr[1:], "\n")
assert.Equal(t, tt.isError.Error(), errStr)
assert.Equal(t, tt.expectedError.Error(), errStr)
} else {
assert.EqualError(t, g.Wait(), tt.isError.Error())
assert.EqualError(t, g.Wait(), tt.expectedError.Error())
}
cancel()
} else {
Expand All @@ -380,7 +380,7 @@ func Test_StorageClient(t *testing.T) {
val, err = got.Fields.GetValue("message")
assert.NoError(t, err)
assert.True(t, tt.expected[val.(string)])
assert.Equal(t, tt.isError, err)
assert.Equal(t, tt.expectedError, err)
receivedCount += 1
if receivedCount == len(tt.expected) {
cancel()
Expand All @@ -392,6 +392,132 @@ func Test_StorageClient(t *testing.T) {
}
}

func Test_Concurrency(t *testing.T) {
tests := []struct {
name string
baseConfig map[string]interface{}
mockHandler func() http.Handler
expectedLen int
}{
{
name: "TestConcurrency_100_Workers",
baseConfig: map[string]interface{}{
"account_name": "beatsblobnew",
"auth.shared_credentials.account_key": "7pfLm1betGiRyyABEM/RFrLYlafLZHbLtGhB52LkWVeBxE7la9mIvk6YYAbQKYE/f0GdhiaOZeV8+AStsAdr/Q==",
"max_workers": 100,
"poll": true,
"poll_interval": "10s",
"containers": []map[string]interface{}{
{
"name": mock.ConcurrencyContainer,
},
},
},
mockHandler: mock.AzureConcurrencyServer,
expectedLen: mock.TotalRandomDataSets,
},
ShourieG marked this conversation as resolved.
Show resolved Hide resolved
{
name: "TestConcurrency_1000_Workers",
baseConfig: map[string]interface{}{
"account_name": "beatsblobnew",
"auth.shared_credentials.account_key": "7pfLm1betGiRyyABEM/RFrLYlafLZHbLtGhB52LkWVeBxE7la9mIvk6YYAbQKYE/f0GdhiaOZeV8+AStsAdr/Q==",
"max_workers": 1000,
"poll": true,
"poll_interval": "10s",
"containers": []map[string]interface{}{
{
"name": mock.ConcurrencyContainer,
},
},
},
mockHandler: mock.AzureConcurrencyServer,
expectedLen: mock.TotalRandomDataSets,
},
{
name: "TestConcurrency_2000_Workers",
baseConfig: map[string]interface{}{
"account_name": "beatsblobnew",
"auth.shared_credentials.account_key": "7pfLm1betGiRyyABEM/RFrLYlafLZHbLtGhB52LkWVeBxE7la9mIvk6YYAbQKYE/f0GdhiaOZeV8+AStsAdr/Q==",
"max_workers": 2000,
"poll": true,
"poll_interval": "500s",
ShourieG marked this conversation as resolved.
Show resolved Hide resolved
"containers": []map[string]interface{}{
{
"name": mock.ConcurrencyContainer,
},
},
},
mockHandler: mock.AzureConcurrencyServer,
expectedLen: mock.TotalRandomDataSets,
},
{
name: "TestConcurrency_5000_Workers",
baseConfig: map[string]interface{}{
"account_name": "beatsblobnew",
"auth.shared_credentials.account_key": "7pfLm1betGiRyyABEM/RFrLYlafLZHbLtGhB52LkWVeBxE7la9mIvk6YYAbQKYE/f0GdhiaOZeV8+AStsAdr/Q==",
"max_workers": 5000,
"poll": true,
"poll_interval": "10s",
"containers": []map[string]interface{}{
{
"name": mock.ConcurrencyContainer,
},
},
},
mockHandler: mock.AzureConcurrencyServer,
expectedLen: mock.TotalRandomDataSets,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
serv := httptest.NewServer(tt.mockHandler())
t.Cleanup(serv.Close)

cfg := conf.MustNewConfigFrom(tt.baseConfig)
conf := config{}
err := cfg.Unpack(&conf)
assert.NoError(t, err)
input := newStatelessInput(conf, serv.URL+"/")

assert.Equal(t, "azure-blob-storage-stateless", input.Name())
assert.NoError(t, input.Test(v2.TestContext{}))

chanClient := beattest.NewChanClient(tt.expectedLen)
t.Cleanup(func() { _ = chanClient.Close() })

ctx, cancel := newV2Context()
t.Cleanup(cancel)

var g errgroup.Group
g.Go(func() error {
return input.Run(ctx, chanClient)
})
timeout := time.NewTimer(100 * time.Second)
t.Cleanup(func() { timeout.Stop() })

var receivedCount int
wait:
for {
select {
case <-timeout.C:
t.Errorf("timed out waiting for %d events", tt.expectedLen)
cancel()
return
case got := <-chanClient.Channel:
var err error
_, err = got.Fields.GetValue("message")
assert.NoError(t, err)
receivedCount += 1
if receivedCount == tt.expectedLen {
cancel()
break wait
}
}
}
})
}
}

func newV2Context() (v2.Context, func()) {
ctx, cancel := context.WithCancel(context.Background())
return v2.Context{
Expand Down
7 changes: 2 additions & 5 deletions x-pack/filebeat/input/azureblobstorage/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -230,11 +230,8 @@ func (j *job) createEvent(message string, offset int64) beat.Event {
},
},
},
// Structs are used here in order to save map allocations
"cloud": struct {
Provider string `json:"provider"`
}{
Provider: "azure",
"cloud": mapstr.M{
"provider": "azure",
},
},
}
Expand Down
124 changes: 124 additions & 0 deletions x-pack/filebeat/input/azureblobstorage/mock/data_random.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
// or more contributor license agreements. Licensed under the Elastic License;
// you may not use this file except in compliance with the Elastic License.

package mock

import (
"encoding/json"
"encoding/xml"
"fmt"
"math/rand"
"time"
)

const (
TotalRandomDataSets = 10000
ConcurrencyContainer = "concurrency_container"
)

// Generates random Azure blob storage container metadata in XML format
type EnumerationResults struct {
XMLName xml.Name `xml:"EnumerationResults"`
ServiceEndpoint string `xml:"ServiceEndpoint,attr"`
ContainerName string `xml:"ContainerName,attr"`
Blobs []Blob `xml:"Blobs>Blob"`
NextMarker string `xml:"NextMarker"`
}

type Blob struct {
Name string `xml:"Name"`
Properties Properties `xml:"Properties"`
Metadata string `xml:"Metadata"`
}

type Properties struct {
LastModified string `xml:"Last-Modified"`
Etag string `xml:"Etag"`
ContentLength int `xml:"Content-Length"`
ContentType string `xml:"Content-Type"`
}

func generateMetadata() []byte {
// Generate random data for x data sets defined by TotalRandomDataSets
const numDataSets = TotalRandomDataSets
dataSets := make([]Blob, numDataSets)

for i := 0; i < numDataSets; i++ {
dataSets[i] = createRandomBlob(i)
}

// Fill in the root XML structure
xmlData := EnumerationResults{
ServiceEndpoint: "https://127.0.0.1/",
ContainerName: "concurrency_container",
Blobs: dataSets,
NextMarker: "",
}

// Marshal the data into XML format
xmlBytes, err := xml.MarshalIndent(xmlData, "", "\t")
if err != nil {
panic(fmt.Sprintf("Error marshaling data: %v", err))
}
return []byte(xml.Header + string(xmlBytes))
}

// Helper function to create a random Blob
func createRandomBlob(i int) Blob {
rand.Seed(time.Now().UnixNano())

return Blob{
Name: fmt.Sprintf("data_%d.json", i),
Properties: Properties{
LastModified: time.Now().Format(time.RFC1123),
Etag: fmt.Sprintf("0x%X", rand.Int63()),
ContentType: "application/json",
},
Metadata: "",
}
}

// Generate Random Blob data in JSON format
type MyData struct {
ID int `json:"id"`
Name string `json:"name"`
Age int `json:"age"`
Email string `json:"email"`
Description string `json:"description"`
}

func generateRandomBlob() []byte {
const numObjects = 10
dataObjects := make([]MyData, numObjects)

for i := 0; i < numObjects; i++ {
dataObjects[i] = createRandomData()
}

jsonBytes, err := json.MarshalIndent(dataObjects, "", "\t")
if err != nil {
panic(fmt.Sprintf("Error marshaling data: %v", err))
}
return jsonBytes
}

func createRandomData() MyData {
rand.Seed(time.Now().UnixNano())

return MyData{
ID: rand.Intn(1000) + 1,
Name: getRandomString([]string{"John", "Alice", "Bob", "Eve"}),
Age: rand.Intn(80) + 18,
Email: getRandomString([]string{"john@example.com", "alice@example.com", "bob@example.com"}),
Description: getRandomString([]string{"Student", "Engineer", "Artist", "Doctor"}),
}
}

func getRandomString(options []string) string {
if len(options) == 0 {
return ""
}
rand.Seed(time.Now().UnixNano())
return options[rand.Intn(len(options))]
}
Loading