Skip to content

Commit

Permalink
Merge pull request #4126 from Megum1n/aws-provider-validate-value-size
Browse files Browse the repository at this point in the history
Validate AWS record values size during batch set generation
  • Loading branch information
k8s-ci-robot authored Feb 9, 2024
2 parents 7311390 + 3cfbb42 commit 3231951
Show file tree
Hide file tree
Showing 6 changed files with 386 additions and 62 deletions.
18 changes: 17 additions & 1 deletion docs/tutorials/aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@ kubectl patch serviceaccount "external-dns" --namespace ${EXTERNALDNS_NS:-"defau
"{\"metadata\": { \"annotations\": { \"eks.amazonaws.com/role-arn\": \"$ROLE_ARN\" }}}"
```

If any part of this step is misconfigured, such as the role with incorrect namespace configured in the trust relationship, annotation pointing the the wrong role, etc., you will see errors like `WebIdentityErr: failed to retrieve credentials`. Check the configuration and make corrections.
If any part of this step is misconfigured, such as the role with incorrect namespace configured in the trust relationship, annotation pointing the the wrong role, etc., you will see errors like `WebIdentityErr: failed to retrieve credentials`. Check the configuration and make corrections.

When the service account annotations are updated, then the current running pods will have to be terminated, so that new pod(s) with proper configuration (environment variables) will be created automatically.

Expand Down Expand Up @@ -991,3 +991,19 @@ An effective starting point for EKS with an ingress controller might look like:
--domain-filter=example.com
--aws-zones-cache-duration=1h
```

### Batch size options

After external-dns generates all changes, it will perform a task to group those changes into batches. Each change will be validated against batch-change-size limits. If at least one of those parameters out of range - the change will be moved to a separate batch. If the change can't fit into any batch - *it will be skipped.*<br>
There are 3 options to control batch size for AWS provider:
* Maximum amount of changes added to one batch
* `--aws-batch-change-size` (default `1000`)
* Maximum size of changes in bytes added to one batch
* `--aws-batch-change-size-bytes` (default `32000`)
* Maximum value count of changes added to one batch
* `aws-batch-change-size-values` (default `1000`)

`aws-batch-change-size` can be very useful for throttling purposes and can be set to any value.

Default values for flags `aws-batch-change-size-bytes` and `aws-batch-change-size-values` are taken from [AWS documentation](https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/DNSLimitations.html#limits-api-requests) for Route53 API. **You should not change those values until you really have to.** <br>
Because those limits are in place, `aws-batch-change-size` can be set to any value: Even if your batch size is `4000` records, your change will be split to separate batches due to bytes/values size limits and apply request will be finished without issues.
24 changes: 13 additions & 11 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,17 +229,19 @@ func main() {
case "aws":
p, err = aws.NewAWSProvider(
aws.AWSConfig{
DomainFilter: domainFilter,
ZoneIDFilter: zoneIDFilter,
ZoneTypeFilter: zoneTypeFilter,
ZoneTagFilter: zoneTagFilter,
ZoneMatchParent: cfg.AWSZoneMatchParent,
BatchChangeSize: cfg.AWSBatchChangeSize,
BatchChangeInterval: cfg.AWSBatchChangeInterval,
EvaluateTargetHealth: cfg.AWSEvaluateTargetHealth,
PreferCNAME: cfg.AWSPreferCNAME,
DryRun: cfg.DryRun,
ZoneCacheDuration: cfg.AWSZoneCacheDuration,
DomainFilter: domainFilter,
ZoneIDFilter: zoneIDFilter,
ZoneTypeFilter: zoneTypeFilter,
ZoneTagFilter: zoneTagFilter,
ZoneMatchParent: cfg.AWSZoneMatchParent,
BatchChangeSize: cfg.AWSBatchChangeSize,
BatchChangeSizeBytes: cfg.AWSBatchChangeSizeBytes,
BatchChangeSizeValues: cfg.AWSBatchChangeSizeValues,
BatchChangeInterval: cfg.AWSBatchChangeInterval,
EvaluateTargetHealth: cfg.AWSEvaluateTargetHealth,
PreferCNAME: cfg.AWSPreferCNAME,
DryRun: cfg.DryRun,
ZoneCacheDuration: cfg.AWSZoneCacheDuration,
},
route53.New(awsSession),
)
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/externaldns/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ type Config struct {
AWSAssumeRole string
AWSAssumeRoleExternalID string
AWSBatchChangeSize int
AWSBatchChangeSizeBytes int
AWSBatchChangeSizeValues int
AWSBatchChangeInterval time.Duration
AWSEvaluateTargetHealth bool
AWSAPIRetries int
Expand Down Expand Up @@ -262,6 +264,8 @@ var defaultConfig = &Config{
AWSAssumeRole: "",
AWSAssumeRoleExternalID: "",
AWSBatchChangeSize: 1000,
AWSBatchChangeSizeBytes: 32000,
AWSBatchChangeSizeValues: 1000,
AWSBatchChangeInterval: time.Second,
AWSEvaluateTargetHealth: true,
AWSAPIRetries: 3,
Expand Down Expand Up @@ -485,6 +489,8 @@ func (cfg *Config) ParseFlags(args []string) error {
app.Flag("aws-assume-role", "When using the AWS API, assume this IAM role. Useful for hosted zones in another AWS account. Specify the full ARN, e.g. `arn:aws:iam::123455567:role/external-dns` (optional)").Default(defaultConfig.AWSAssumeRole).StringVar(&cfg.AWSAssumeRole)
app.Flag("aws-assume-role-external-id", "When using the AWS API and assuming a role then specify this external ID` (optional)").Default(defaultConfig.AWSAssumeRoleExternalID).StringVar(&cfg.AWSAssumeRoleExternalID)
app.Flag("aws-batch-change-size", "When using the AWS provider, set the maximum number of changes that will be applied in each batch.").Default(strconv.Itoa(defaultConfig.AWSBatchChangeSize)).IntVar(&cfg.AWSBatchChangeSize)
app.Flag("aws-batch-change-size-bytes", "When using the AWS provider, set the maximum byte size that will be applied in each batch.").Default(strconv.Itoa(defaultConfig.AWSBatchChangeSizeBytes)).IntVar(&cfg.AWSBatchChangeSizeBytes)
app.Flag("aws-batch-change-size-values", "When using the AWS provider, set the maximum total record values that will be applied in each batch.").Default(strconv.Itoa(defaultConfig.AWSBatchChangeSizeValues)).IntVar(&cfg.AWSBatchChangeSizeValues)
app.Flag("aws-batch-change-interval", "When using the AWS provider, set the interval between batch changes.").Default(defaultConfig.AWSBatchChangeInterval.String()).DurationVar(&cfg.AWSBatchChangeInterval)
app.Flag("aws-evaluate-target-health", "When using the AWS provider, set whether to evaluate the health of a DNS target (default: enabled, disable with --no-aws-evaluate-target-health)").Default(strconv.FormatBool(defaultConfig.AWSEvaluateTargetHealth)).BoolVar(&cfg.AWSEvaluateTargetHealth)
app.Flag("aws-api-retries", "When using the AWS API, set the maximum number of retries before giving up.").Default(strconv.Itoa(defaultConfig.AWSAPIRetries)).IntVar(&cfg.AWSAPIRetries)
Expand Down
8 changes: 8 additions & 0 deletions pkg/apis/externaldns/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ var (
AWSAssumeRole: "",
AWSAssumeRoleExternalID: "",
AWSBatchChangeSize: 1000,
AWSBatchChangeSizeBytes: 32000,
AWSBatchChangeSizeValues: 1000,
AWSBatchChangeInterval: time.Second,
AWSEvaluateTargetHealth: true,
AWSAPIRetries: 3,
Expand Down Expand Up @@ -171,6 +173,8 @@ var (
AWSAssumeRole: "some-other-role",
AWSAssumeRoleExternalID: "pg2000",
AWSBatchChangeSize: 100,
AWSBatchChangeSizeBytes: 16000,
AWSBatchChangeSizeValues: 100,
AWSBatchChangeInterval: time.Second * 2,
AWSEvaluateTargetHealth: false,
AWSAPIRetries: 13,
Expand Down Expand Up @@ -357,6 +361,8 @@ func TestParseFlags(t *testing.T) {
"--aws-assume-role=some-other-role",
"--aws-assume-role-external-id=pg2000",
"--aws-batch-change-size=100",
"--aws-batch-change-size-bytes=16000",
"--aws-batch-change-size-values=100",
"--aws-batch-change-interval=2s",
"--aws-api-retries=13",
"--aws-prefer-cname",
Expand Down Expand Up @@ -480,6 +486,8 @@ func TestParseFlags(t *testing.T) {
"EXTERNAL_DNS_AWS_ASSUME_ROLE": "some-other-role",
"EXTERNAL_DNS_AWS_ASSUME_ROLE_EXTERNAL_ID": "pg2000",
"EXTERNAL_DNS_AWS_BATCH_CHANGE_SIZE": "100",
"EXTERNAL_DNS_AWS_BATCH_CHANGE_SIZE_BYTES": "16000",
"EXTERNAL_DNS_AWS_BATCH_CHANGE_SIZE_VALUES": "100",
"EXTERNAL_DNS_AWS_BATCH_CHANGE_INTERVAL": "2s",
"EXTERNAL_DNS_AWS_EVALUATE_TARGET_HEALTH": "0",
"EXTERNAL_DNS_AWS_API_RETRIES": "13",
Expand Down
117 changes: 84 additions & 33 deletions provider/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ type Route53API interface {
type Route53Change struct {
route53.Change
OwnedRecord string
sizeBytes int
sizeValues int
}

type Route53Changes []*Route53Change
Expand All @@ -226,11 +228,13 @@ type zonesListCache struct {
// AWSProvider is an implementation of Provider for AWS Route53.
type AWSProvider struct {
provider.BaseProvider
client Route53API
dryRun bool
batchChangeSize int
batchChangeInterval time.Duration
evaluateTargetHealth bool
client Route53API
dryRun bool
batchChangeSize int
batchChangeSizeBytes int
batchChangeSizeValues int
batchChangeInterval time.Duration
evaluateTargetHealth bool
// only consider hosted zones managing domains ending in this suffix
domainFilter endpoint.DomainFilter
// filter hosted zones by id
Expand All @@ -249,35 +253,39 @@ type AWSProvider struct {

// AWSConfig contains configuration to create a new AWS provider.
type AWSConfig struct {
DomainFilter endpoint.DomainFilter
ZoneIDFilter provider.ZoneIDFilter
ZoneTypeFilter provider.ZoneTypeFilter
ZoneTagFilter provider.ZoneTagFilter
ZoneMatchParent bool
BatchChangeSize int
BatchChangeInterval time.Duration
EvaluateTargetHealth bool
PreferCNAME bool
DryRun bool
ZoneCacheDuration time.Duration
DomainFilter endpoint.DomainFilter
ZoneIDFilter provider.ZoneIDFilter
ZoneTypeFilter provider.ZoneTypeFilter
ZoneTagFilter provider.ZoneTagFilter
ZoneMatchParent bool
BatchChangeSize int
BatchChangeSizeBytes int
BatchChangeSizeValues int
BatchChangeInterval time.Duration
EvaluateTargetHealth bool
PreferCNAME bool
DryRun bool
ZoneCacheDuration time.Duration
}

// NewAWSProvider initializes a new AWS Route53 based Provider.
func NewAWSProvider(awsConfig AWSConfig, client Route53API) (*AWSProvider, error) {
provider := &AWSProvider{
client: client,
domainFilter: awsConfig.DomainFilter,
zoneIDFilter: awsConfig.ZoneIDFilter,
zoneTypeFilter: awsConfig.ZoneTypeFilter,
zoneTagFilter: awsConfig.ZoneTagFilter,
zoneMatchParent: awsConfig.ZoneMatchParent,
batchChangeSize: awsConfig.BatchChangeSize,
batchChangeInterval: awsConfig.BatchChangeInterval,
evaluateTargetHealth: awsConfig.EvaluateTargetHealth,
preferCNAME: awsConfig.PreferCNAME,
dryRun: awsConfig.DryRun,
zonesCache: &zonesListCache{duration: awsConfig.ZoneCacheDuration},
failedChangesQueue: make(map[string]Route53Changes),
client: client,
domainFilter: awsConfig.DomainFilter,
zoneIDFilter: awsConfig.ZoneIDFilter,
zoneTypeFilter: awsConfig.ZoneTypeFilter,
zoneTagFilter: awsConfig.ZoneTagFilter,
zoneMatchParent: awsConfig.ZoneMatchParent,
batchChangeSize: awsConfig.BatchChangeSize,
batchChangeSizeBytes: awsConfig.BatchChangeSizeBytes,
batchChangeSizeValues: awsConfig.BatchChangeSizeValues,
batchChangeInterval: awsConfig.BatchChangeInterval,
evaluateTargetHealth: awsConfig.EvaluateTargetHealth,
preferCNAME: awsConfig.PreferCNAME,
dryRun: awsConfig.DryRun,
zonesCache: &zonesListCache{duration: awsConfig.ZoneCacheDuration},
failedChangesQueue: make(map[string]Route53Changes),
}

return provider, nil
Expand Down Expand Up @@ -573,7 +581,8 @@ func (p *AWSProvider) submitChanges(ctx context.Context, changes Route53Changes,
retriedChanges, newChanges := findChangesInQueue(cs, p.failedChangesQueue[z])
p.failedChangesQueue[z] = nil

batchCs := append(batchChangeSet(newChanges, p.batchChangeSize), batchChangeSet(retriedChanges, p.batchChangeSize)...)
batchCs := append(batchChangeSet(newChanges, p.batchChangeSize, p.batchChangeSizeBytes, p.batchChangeSizeValues),
batchChangeSet(retriedChanges, p.batchChangeSize, p.batchChangeSizeBytes, p.batchChangeSizeValues)...)
for i, b := range batchCs {
if len(b) == 0 {
continue
Expand Down Expand Up @@ -744,6 +753,8 @@ func (p *AWSProvider) newChange(action string, ep *endpoint.Endpoint) (*Route53C
HostedZoneId: aws.String(cleanZoneID(targetHostedZone)),
EvaluateTargetHealth: aws.Bool(evalTargetHealth),
}
change.sizeBytes += len([]byte(ep.Targets[0]))
change.sizeValues += 1
} else {
change.ResourceRecordSet.Type = aws.String(ep.RecordType)
if !ep.RecordTTL.IsConfigured() {
Expand All @@ -756,9 +767,18 @@ func (p *AWSProvider) newChange(action string, ep *endpoint.Endpoint) (*Route53C
change.ResourceRecordSet.ResourceRecords[idx] = &route53.ResourceRecord{
Value: aws.String(val),
}
change.sizeBytes += len([]byte(val))
change.sizeValues += 1
}
}

if action == route53.ChangeActionUpsert {
// If the value of the Action element is UPSERT, each ResourceRecord element and each character in a Value
// element is counted twice
change.sizeBytes *= 2
change.sizeValues *= 2
}

setIdentifier := ep.SetIdentifier
if setIdentifier != "" {
change.ResourceRecordSet.SetIdentifier = aws.String(setIdentifier)
Expand Down Expand Up @@ -864,8 +884,26 @@ func (p *AWSProvider) tagsForZone(ctx context.Context, zoneID string) (map[strin
return tagMap, nil
}

func batchChangeSet(cs Route53Changes, batchSize int) []Route53Changes {
if len(cs) <= batchSize {
// count bytes for all changes values
func countChangeBytes(cs Route53Changes) int {
count := 0
for _, c := range cs {
count += c.sizeBytes
}
return count
}

// count total value count for all changes
func countChangeValues(cs Route53Changes) int {
count := 0
for _, c := range cs {
count += c.sizeValues
}
return count
}

func batchChangeSet(cs Route53Changes, batchSize int, batchSizeBytes int, batchSizeValues int) []Route53Changes {
if len(cs) <= batchSize && countChangeBytes(cs) <= batchSizeBytes && countChangeValues(cs) <= batchSizeValues {
res := sortChangesByActionNameType(cs)
return []Route53Changes{res}
}
Expand All @@ -883,12 +921,25 @@ func batchChangeSet(cs Route53Changes, batchSize int) []Route53Changes {
currentBatch := Route53Changes{}
for k, name := range names {
v := changesByOwnership[name]
vBytes := countChangeBytes(v)
vValues := countChangeValues(v)
if len(v) > batchSize {
log.Warnf("Total changes for %v exceeds max batch size of %d, total changes: %d; changes will not be performed", k, batchSize, len(v))
continue
}
if vBytes > batchSizeBytes {
log.Warnf("Total changes for %v exceeds max batch size bytes of %d, total changes bytes: %d; changes will not be performed", k, batchSizeBytes, vBytes)
continue
}
if vValues > batchSizeValues {
log.Warnf("Total changes for %v exceeds max batch size values of %d, total changes values: %d; changes will not be performed", k, batchSizeValues, vValues)
continue
}

bytes := countChangeBytes(currentBatch) + vBytes
values := countChangeValues(currentBatch) + vValues

if len(currentBatch)+len(v) > batchSize {
if len(currentBatch)+len(v) > batchSize || bytes > batchSizeBytes || values > batchSizeValues {
// currentBatch would be too large if we add this changeset;
// add currentBatch to batchChanges and start a new currentBatch
batchChanges = append(batchChanges, sortChangesByActionNameType(currentBatch))
Expand Down
Loading

0 comments on commit 3231951

Please sign in to comment.