Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate AWS record values size during batch set generation #4126

Merged
16 changes: 16 additions & 0 deletions docs/tutorials/aws.md
Original file line number Diff line number Diff line change
Expand Up @@ -982,3 +982,19 @@ An effective starting point for EKS with an ingress controller might look like:
--domain-filter=example.com
--aws-zones-cache-duration=1h
```

### Batch size options

After external-dns generates all changes, it will perform a task to group those changes into batches. Each change will be validated against batch-change-size limits. If at least one of those parameters out of range - the change will be moved to a separate branch. If the change can't fit into any batch - *it will be skipped.*<br>
megum1n marked this conversation as resolved.
Show resolved Hide resolved
There are 3 options to control batch size for AWS provider:
* Maximum amount of changes added to one batch
* `--aws-batch-change-size` (default `1000`)
* Maximum size of changes in bytes added to one batch
* `--aws-batch-change-size-bytes` (default `32000`)
* Maximum value count of changes added to one batch
* `aws-batch-change-size-values` (default `1000`)

`aws-batch-change-size` can be very useful for throttling purposes and can be set to any value.

Default values for flags `aws-batch-change-size-bytes` and `aws-batch-change-size-values` are taken from [AWS documentation](https://docs.aws.amazon.com/Route53/latest/DeveloperGuide/DNSLimitations.html#limits-api-requests) for Route53 API. **You should not change those values until you really have to.** <br>
Because those limits are in place, `aws-batch-change-size` can be set to any value: Even if your batch size is `4000` records, your change will be split to separate batches due to bytes/values size limits and apply request will be finished without issues.
22 changes: 12 additions & 10 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,18 @@ func main() {
case "aws":
p, err = aws.NewAWSProvider(
aws.AWSConfig{
DomainFilter: domainFilter,
ZoneIDFilter: zoneIDFilter,
ZoneTypeFilter: zoneTypeFilter,
ZoneTagFilter: zoneTagFilter,
BatchChangeSize: cfg.AWSBatchChangeSize,
BatchChangeInterval: cfg.AWSBatchChangeInterval,
EvaluateTargetHealth: cfg.AWSEvaluateTargetHealth,
PreferCNAME: cfg.AWSPreferCNAME,
DryRun: cfg.DryRun,
ZoneCacheDuration: cfg.AWSZoneCacheDuration,
DomainFilter: domainFilter,
ZoneIDFilter: zoneIDFilter,
ZoneTypeFilter: zoneTypeFilter,
ZoneTagFilter: zoneTagFilter,
BatchChangeSize: cfg.AWSBatchChangeSize,
BatchChangeSizeBytes: cfg.AWSBatchChangeSizeBytes,
BatchChangeSizeValues: cfg.AWSBatchChangeSizeValues,
BatchChangeInterval: cfg.AWSBatchChangeInterval,
EvaluateTargetHealth: cfg.AWSEvaluateTargetHealth,
PreferCNAME: cfg.AWSPreferCNAME,
DryRun: cfg.DryRun,
ZoneCacheDuration: cfg.AWSZoneCacheDuration,
},
route53.New(awsSession),
)
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/externaldns/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ type Config struct {
AWSAssumeRole string
AWSAssumeRoleExternalID string
AWSBatchChangeSize int
AWSBatchChangeSizeBytes int
AWSBatchChangeSizeValues int
AWSBatchChangeInterval time.Duration
AWSEvaluateTargetHealth bool
AWSAPIRetries int
Expand Down Expand Up @@ -258,6 +260,8 @@ var defaultConfig = &Config{
AWSAssumeRole: "",
AWSAssumeRoleExternalID: "",
AWSBatchChangeSize: 1000,
AWSBatchChangeSizeBytes: 32000,
AWSBatchChangeSizeValues: 1000,
AWSBatchChangeInterval: time.Second,
AWSEvaluateTargetHealth: true,
AWSAPIRetries: 3,
Expand Down Expand Up @@ -477,6 +481,8 @@ func (cfg *Config) ParseFlags(args []string) error {
app.Flag("aws-assume-role", "When using the AWS API, assume this IAM role. Useful for hosted zones in another AWS account. Specify the full ARN, e.g. `arn:aws:iam::123455567:role/external-dns` (optional)").Default(defaultConfig.AWSAssumeRole).StringVar(&cfg.AWSAssumeRole)
app.Flag("aws-assume-role-external-id", "When using the AWS API and assuming a role then specify this external ID` (optional)").Default(defaultConfig.AWSAssumeRoleExternalID).StringVar(&cfg.AWSAssumeRoleExternalID)
app.Flag("aws-batch-change-size", "When using the AWS provider, set the maximum number of changes that will be applied in each batch.").Default(strconv.Itoa(defaultConfig.AWSBatchChangeSize)).IntVar(&cfg.AWSBatchChangeSize)
app.Flag("aws-batch-change-size-bytes", "When using the AWS provider, set the maximum byte size that will be applied in each batch.").Default(strconv.Itoa(defaultConfig.AWSBatchChangeSizeBytes)).IntVar(&cfg.AWSBatchChangeSizeBytes)
app.Flag("aws-batch-change-size-values", "When using the AWS provider, set the maximum total record values that will be applied in each batch.").Default(strconv.Itoa(defaultConfig.AWSBatchChangeSizeValues)).IntVar(&cfg.AWSBatchChangeSizeValues)
app.Flag("aws-batch-change-interval", "When using the AWS provider, set the interval between batch changes.").Default(defaultConfig.AWSBatchChangeInterval.String()).DurationVar(&cfg.AWSBatchChangeInterval)
app.Flag("aws-evaluate-target-health", "When using the AWS provider, set whether to evaluate the health of a DNS target (default: enabled, disable with --no-aws-evaluate-target-health)").Default(strconv.FormatBool(defaultConfig.AWSEvaluateTargetHealth)).BoolVar(&cfg.AWSEvaluateTargetHealth)
app.Flag("aws-api-retries", "When using the AWS API, set the maximum number of retries before giving up.").Default(strconv.Itoa(defaultConfig.AWSAPIRetries)).IntVar(&cfg.AWSAPIRetries)
Expand Down
8 changes: 8 additions & 0 deletions pkg/apis/externaldns/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ var (
AWSAssumeRole: "",
AWSAssumeRoleExternalID: "",
AWSBatchChangeSize: 1000,
AWSBatchChangeSizeBytes: 32000,
AWSBatchChangeSizeValues: 1000,
AWSBatchChangeInterval: time.Second,
AWSEvaluateTargetHealth: true,
AWSAPIRetries: 3,
Expand Down Expand Up @@ -169,6 +171,8 @@ var (
AWSAssumeRole: "some-other-role",
AWSAssumeRoleExternalID: "pg2000",
AWSBatchChangeSize: 100,
AWSBatchChangeSizeBytes: 16000,
AWSBatchChangeSizeValues: 100,
AWSBatchChangeInterval: time.Second * 2,
AWSEvaluateTargetHealth: false,
AWSAPIRetries: 13,
Expand Down Expand Up @@ -354,6 +358,8 @@ func TestParseFlags(t *testing.T) {
"--aws-assume-role=some-other-role",
"--aws-assume-role-external-id=pg2000",
"--aws-batch-change-size=100",
"--aws-batch-change-size-bytes=16000",
"--aws-batch-change-size-values=100",
"--aws-batch-change-interval=2s",
"--aws-api-retries=13",
"--aws-prefer-cname",
Expand Down Expand Up @@ -476,6 +482,8 @@ func TestParseFlags(t *testing.T) {
"EXTERNAL_DNS_AWS_ASSUME_ROLE": "some-other-role",
"EXTERNAL_DNS_AWS_ASSUME_ROLE_EXTERNAL_ID": "pg2000",
"EXTERNAL_DNS_AWS_BATCH_CHANGE_SIZE": "100",
"EXTERNAL_DNS_AWS_BATCH_CHANGE_SIZE_BYTES": "16000",
"EXTERNAL_DNS_AWS_BATCH_CHANGE_SIZE_VALUES": "100",
"EXTERNAL_DNS_AWS_BATCH_CHANGE_INTERVAL": "2s",
"EXTERNAL_DNS_AWS_EVALUATE_TARGET_HEALTH": "0",
"EXTERNAL_DNS_AWS_API_RETRIES": "13",
Expand Down
115 changes: 84 additions & 31 deletions provider/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,8 @@ type Route53API interface {
type Route53Change struct {
route53.Change
OwnedRecord string
sizeBytes int
sizeValues int
}

type Route53Changes []*Route53Change
Expand All @@ -227,11 +229,13 @@ type zonesListCache struct {
// AWSProvider is an implementation of Provider for AWS Route53.
type AWSProvider struct {
provider.BaseProvider
client Route53API
dryRun bool
batchChangeSize int
batchChangeInterval time.Duration
evaluateTargetHealth bool
client Route53API
dryRun bool
batchChangeSize int
batchChangeSizeBytes int
batchChangeSizeValues int
batchChangeInterval time.Duration
evaluateTargetHealth bool
// only consider hosted zones managing domains ending in this suffix
domainFilter endpoint.DomainFilter
// filter hosted zones by id
Expand All @@ -248,33 +252,37 @@ type AWSProvider struct {

// AWSConfig contains configuration to create a new AWS provider.
type AWSConfig struct {
DomainFilter endpoint.DomainFilter
ZoneIDFilter provider.ZoneIDFilter
ZoneTypeFilter provider.ZoneTypeFilter
ZoneTagFilter provider.ZoneTagFilter
BatchChangeSize int
BatchChangeInterval time.Duration
EvaluateTargetHealth bool
PreferCNAME bool
DryRun bool
ZoneCacheDuration time.Duration
DomainFilter endpoint.DomainFilter
ZoneIDFilter provider.ZoneIDFilter
ZoneTypeFilter provider.ZoneTypeFilter
ZoneTagFilter provider.ZoneTagFilter
BatchChangeSize int
BatchChangeSizeBytes int
BatchChangeSizeValues int
BatchChangeInterval time.Duration
EvaluateTargetHealth bool
PreferCNAME bool
DryRun bool
ZoneCacheDuration time.Duration
}

// NewAWSProvider initializes a new AWS Route53 based Provider.
func NewAWSProvider(awsConfig AWSConfig, client Route53API) (*AWSProvider, error) {
provider := &AWSProvider{
client: client,
domainFilter: awsConfig.DomainFilter,
zoneIDFilter: awsConfig.ZoneIDFilter,
zoneTypeFilter: awsConfig.ZoneTypeFilter,
zoneTagFilter: awsConfig.ZoneTagFilter,
batchChangeSize: awsConfig.BatchChangeSize,
batchChangeInterval: awsConfig.BatchChangeInterval,
evaluateTargetHealth: awsConfig.EvaluateTargetHealth,
preferCNAME: awsConfig.PreferCNAME,
dryRun: awsConfig.DryRun,
zonesCache: &zonesListCache{duration: awsConfig.ZoneCacheDuration},
failedChangesQueue: make(map[string]Route53Changes),
client: client,
domainFilter: awsConfig.DomainFilter,
zoneIDFilter: awsConfig.ZoneIDFilter,
zoneTypeFilter: awsConfig.ZoneTypeFilter,
zoneTagFilter: awsConfig.ZoneTagFilter,
batchChangeSize: awsConfig.BatchChangeSize,
batchChangeSizeBytes: awsConfig.BatchChangeSizeBytes,
batchChangeSizeValues: awsConfig.BatchChangeSizeValues,
batchChangeInterval: awsConfig.BatchChangeInterval,
evaluateTargetHealth: awsConfig.EvaluateTargetHealth,
preferCNAME: awsConfig.PreferCNAME,
dryRun: awsConfig.DryRun,
zonesCache: &zonesListCache{duration: awsConfig.ZoneCacheDuration},
failedChangesQueue: make(map[string]Route53Changes),
}

return provider, nil
Expand Down Expand Up @@ -565,7 +573,8 @@ func (p *AWSProvider) submitChanges(ctx context.Context, changes Route53Changes,
retriedChanges, newChanges := findChangesInQueue(cs, p.failedChangesQueue[z])
p.failedChangesQueue[z] = nil

batchCs := append(batchChangeSet(newChanges, p.batchChangeSize), batchChangeSet(retriedChanges, p.batchChangeSize)...)
batchCs := append(batchChangeSet(newChanges, p.batchChangeSize, p.batchChangeSizeBytes, p.batchChangeSizeValues),
batchChangeSet(retriedChanges, p.batchChangeSize, p.batchChangeSizeBytes, p.batchChangeSizeValues)...)
for i, b := range batchCs {
if len(b) == 0 {
continue
Expand Down Expand Up @@ -719,6 +728,8 @@ func (p *AWSProvider) newChange(action string, ep *endpoint.Endpoint) (*Route53C
Name: aws.String(ep.DNSName),
},
},
sizeBytes: 0,
sizeValues: 0,
megum1n marked this conversation as resolved.
Show resolved Hide resolved
}
dualstack := false
if targetHostedZone := isAWSAlias(ep); targetHostedZone != "" {
Expand All @@ -736,6 +747,8 @@ func (p *AWSProvider) newChange(action string, ep *endpoint.Endpoint) (*Route53C
HostedZoneId: aws.String(cleanZoneID(targetHostedZone)),
EvaluateTargetHealth: aws.Bool(evalTargetHealth),
}
change.sizeBytes += len([]byte(ep.Targets[0]))
change.sizeValues += 1
} else {
change.ResourceRecordSet.Type = aws.String(ep.RecordType)
if !ep.RecordTTL.IsConfigured() {
Expand All @@ -748,9 +761,18 @@ func (p *AWSProvider) newChange(action string, ep *endpoint.Endpoint) (*Route53C
change.ResourceRecordSet.ResourceRecords[idx] = &route53.ResourceRecord{
Value: aws.String(val),
}
change.sizeBytes += len([]byte(val))
change.sizeValues += 1
}
}

if action == route53.ChangeActionUpsert {
// If the value of the Action element is UPSERT, each ResourceRecord element and each character in a Value
// element is counted twice
change.sizeBytes *= 2
change.sizeValues *= 2
}

setIdentifier := ep.SetIdentifier
if setIdentifier != "" {
change.ResourceRecordSet.SetIdentifier = aws.String(setIdentifier)
Expand Down Expand Up @@ -856,8 +878,26 @@ func (p *AWSProvider) tagsForZone(ctx context.Context, zoneID string) (map[strin
return tagMap, nil
}

func batchChangeSet(cs Route53Changes, batchSize int) []Route53Changes {
if len(cs) <= batchSize {
// count bytes for all changes values
func countChangeBytes(cs Route53Changes) int {
count := 0
for _, c := range cs {
count += c.sizeBytes
}
return count
}

// count total value count for all changes
func countChangeValues(cs Route53Changes) int {
count := 0
for _, c := range cs {
count += c.sizeValues
}
return count
}

func batchChangeSet(cs Route53Changes, batchSize int, batchSizeBytes int, batchSizeValues int) []Route53Changes {
if len(cs) <= batchSize && countChangeBytes(cs) <= batchSizeBytes && countChangeValues(cs) <= batchSizeValues {
res := sortChangesByActionNameType(cs)
return []Route53Changes{res}
}
Expand All @@ -875,12 +915,25 @@ func batchChangeSet(cs Route53Changes, batchSize int) []Route53Changes {
currentBatch := Route53Changes{}
for k, name := range names {
v := changesByOwnership[name]
vBytes := countChangeBytes(v)
vValues := countChangeValues(v)
if len(v) > batchSize {
log.Warnf("Total changes for %v exceeds max batch size of %d, total changes: %d; changes will not be performed", k, batchSize, len(v))
continue
}
if vBytes > batchSizeBytes {
log.Warnf("Total changes for %v exceeds max batch size bytes of %d, total changes bytes: %d; changes will not be performed", k, batchSizeBytes, vBytes)
continue
}
if vValues > batchSizeValues {
log.Warnf("Total changes for %v exceeds max batch size values of %d, total changes values: %d; changes will not be performed", k, batchSizeValues, vValues)
continue
}

bytes := countChangeBytes(currentBatch) + vBytes
values := countChangeValues(currentBatch) + vValues

if len(currentBatch)+len(v) > batchSize {
if len(currentBatch)+len(v) > batchSize || bytes > batchSizeBytes || values > batchSizeValues {
// currentBatch would be too large if we add this changeset;
// add currentBatch to batchChanges and start a new currentBatch
batchChanges = append(batchChanges, sortChangesByActionNameType(currentBatch))
Expand Down
Loading
Loading