diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml new file mode 100644 index 000000000..b0b36fae1 --- /dev/null +++ b/.github/workflows/publish-docs.yml @@ -0,0 +1,29 @@ +name: Publish docs via Github pages +on: + push: + branches: + - master + - mkdocs +jobs: + deploy: + runs-on: ubuntu-latest + steps: + - name: Checkout master + uses: actions/checkout@v4 + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: 3.x + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- + - run: pip install \ + mkdocs-material \ + pymdown-extensions \ + mkdocs-glightbox \ + mkdocs-pymdownx-material-extras + - run: mkdocs gh-deploy --force \ No newline at end of file diff --git a/Makefile b/Makefile index 3eb06ea50..0bc1697d6 100644 --- a/Makefile +++ b/Makefile @@ -37,9 +37,13 @@ manifests: $(VGOPATH) $(CONTROLLER_GEN) @find "$(REPO_ROOT)/config/crd/bases" -name "*.yaml" -exec cp '{}' "$(REPO_ROOT)/charts/druid/charts/crds/templates/" \; @controller-gen rbac:roleName=manager-role paths="./internal/controller/..." +.PHONY: generate-api-docs +generate-api-docs: $(CRD_REF_DOCS) + @crd-ref-docs --source-path "$(REPO_ROOT)/api" --config "$(HACK_DIR)/api-reference/config.yaml" --output-path "$(REPO_ROOT)/docs/api-reference/etcd-druid-api.md" --renderer markdown + # Generate code .PHONY: generate -generate: manifests $(CONTROLLER_GEN) $(GOIMPORTS) $(MOCKGEN) +generate: manifests generate-api-docs $(CONTROLLER_GEN) $(GOIMPORTS) $(MOCKGEN) @go generate "$(REPO_ROOT)/internal/..." @"$(HACK_DIR)/update-codegen.sh" diff --git a/README.md b/README.md index e9a8eeae2..059d2bfae 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ -[![REUSE status](https://api.reuse.software/badge/github.com/gardener/etcd-druid)](https://api.reuse.software/info/github.com/gardener/etcd-druid) [![CI Build status](https://concourse.ci.gardener.cloud/api/v1/teams/gardener/pipelines/etcd-druid-master/jobs/master-head-update-job/badge)](https://concourse.ci.gardener.cloud/teams/gardener/pipelines/etcd-druid-master/jobs/master-head-update-job) [![Go Report Card](https://goreportcard.com/badge/github.com/gardener/etcd-druid)](https://goreportcard.com/report/github.com/gardener/etcd-druid) [![License: Apache-2.0](https://img.shields.io/badge/License-Apache--2.0-blue.svg)](LICENSE) [![Release](https://img.shields.io/github/v/release/gardener/etcd-druid.svg?style=flat)](https://github.com/gardener/etcd-druid) [![Go Reference](https://pkg.go.dev/badge/github.com/gardener/etcd-druid.svg)](https://pkg.go.dev/github.com/gardener/etcd-druid) +[![REUSE status](https://api.reuse.software/badge/github.com/gardener/etcd-druid)](https://api.reuse.software/info/github.com/gardener/etcd-druid) [![CI Build status](https://concourse.ci.gardener.cloud/api/v1/teams/gardener/pipelines/etcd-druid-master/jobs/master-head-update-job/badge)](https://concourse.ci.gardener.cloud/teams/gardener/pipelines/etcd-druid-master/jobs/master-head-update-job) [![Go Report Card](https://goreportcard.com/badge/github.com/gardener/etcd-druid)](https://goreportcard.com/report/github.com/gardener/etcd-druid) [![License: Apache-2.0](https://img.shields.io/badge/License-Apache--2.0-blue.svg)](LICENSE) [![Release](https://img.shields.io/github/v/release/gardener/etcd-druid.svg?style=flat)](https://github.com/gardener/etcd-druid) [![Go Reference](https://pkg.go.dev/badge/github.com/gardener/etcd-druid.svg)](https://pkg.go.dev/github.com/gardener/etcd-druid) [![Docs](https://img.shields.io/badge/Docs-reference-orange)](https://gardener.github.io/etcd-druid/index.html) `etcd-druid` is an [etcd](https://github.com/etcd-io/etcd) [operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) which makes it easy to configure, provision, reconcile, monitor and delete etcd clusters. It enables management of etcd clusters through [declarative Kubernetes API model](config/crd/bases/crd-druid.gardener.cloud_etcds.yaml). @@ -14,7 +14,7 @@ In every etcd cluster managed by `etcd-druid`, each etcd member is a two contain - Restoration - In case of a DB corruption for a single-member cluster it helps in restoring from latest set of snapshots (full & delta). - Member control operations. -`etcd-druid` additional provides the following capabilities: +`etcd-druid` additionally provides the following capabilities: - Facilitates declarative scale-out of [etcd](https://github.com/etcd-io/etcd) clusters. - Provides protection against accidental deletion/mutation of resources provisioned as part of an etcd cluster. @@ -29,7 +29,7 @@ If you are looking to try out druid then you can use a [Kind](https://kind.sigs. https://github.com/user-attachments/assets/cfe0d891-f709-4d7f-b975-4300c6de67e4 -For detailed documentation, see our `/docs` folder. Please find the [index](docs/README.md) here. +For detailed documentation, see our [docs](https://gardener.github.io/etcd-druid/index.html). ## Contributions diff --git a/docs/api-reference/etcd-druid-api.md b/docs/api-reference/etcd-druid-api.md new file mode 100644 index 000000000..14824674a --- /dev/null +++ b/docs/api-reference/etcd-druid-api.md @@ -0,0 +1,679 @@ +# API Reference + +## Packages +- [druid.gardener.cloud/v1alpha1](#druidgardenercloudv1alpha1) + + +## druid.gardener.cloud/v1alpha1 + +Package v1alpha1 contains API Schema definitions for the druid v1alpha1 API group + +### Resource Types +- [Etcd](#etcd) +- [EtcdCopyBackupsTask](#etcdcopybackupstask) + + + +#### BackupSpec + + + +BackupSpec defines parameters associated with the full and delta snapshots of etcd. + + + +_Appears in:_ +- [EtcdSpec](#etcdspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `port` _integer_ | Port define the port on which etcd-backup-restore server will be exposed. | | | +| `tls` _[TLSConfig](#tlsconfig)_ | | | | +| `image` _string_ | Image defines the etcd container image and tag | | | +| `store` _[StoreSpec](#storespec)_ | Store defines the specification of object store provider for storing backups. | | | +| `resources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#resourcerequirements-v1-core)_ | Resources defines compute Resources required by backup-restore container.
More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ | | | +| `compactionResources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#resourcerequirements-v1-core)_ | CompactionResources defines compute Resources required by compaction job.
More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ | | | +| `fullSnapshotSchedule` _string_ | FullSnapshotSchedule defines the cron standard schedule for full snapshots. | | | +| `garbageCollectionPolicy` _[GarbageCollectionPolicy](#garbagecollectionpolicy)_ | GarbageCollectionPolicy defines the policy for garbage collecting old backups | | Enum: [Exponential LimitBased]
| +| `maxBackupsLimitBasedGC` _integer_ | MaxBackupsLimitBasedGC defines the maximum number of Full snapshots to retain in Limit Based GarbageCollectionPolicy
All full snapshots beyond this limit will be garbage collected. | | | +| `garbageCollectionPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#duration-v1-meta)_ | GarbageCollectionPeriod defines the period for garbage collecting old backups | | | +| `deltaSnapshotPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#duration-v1-meta)_ | DeltaSnapshotPeriod defines the period after which delta snapshots will be taken | | | +| `deltaSnapshotMemoryLimit` _[Quantity](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#quantity-resource-api)_ | DeltaSnapshotMemoryLimit defines the memory limit after which delta snapshots will be taken | | | +| `deltaSnapshotRetentionPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#duration-v1-meta)_ | DeltaSnapshotRetentionPeriod defines the duration for which delta snapshots will be retained, excluding the latest snapshot set.
The value should be a string formatted as a duration (e.g., '1s', '2m', '3h', '4d') | | Pattern: `^([0-9][0-9]*([.][0-9]+)?(s\|m\|h\|d))+$`
Type: string
| +| `compression` _[CompressionSpec](#compressionspec)_ | SnapshotCompression defines the specification for compression of Snapshots. | | | +| `enableProfiling` _boolean_ | EnableProfiling defines if profiling should be enabled for the etcd-backup-restore-sidecar | | | +| `etcdSnapshotTimeout` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#duration-v1-meta)_ | EtcdSnapshotTimeout defines the timeout duration for etcd FullSnapshot operation | | | +| `leaderElection` _[LeaderElectionSpec](#leaderelectionspec)_ | LeaderElection defines parameters related to the LeaderElection configuration. | | | + + +#### ClientService + + + +ClientService defines the parameters of the client service that a user can specify + + + +_Appears in:_ +- [EtcdConfig](#etcdconfig) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `annotations` _object (keys:string, values:string)_ | Annotations specify the annotations that should be added to the client service | | | +| `labels` _object (keys:string, values:string)_ | Labels specify the labels that should be added to the client service | | | + + +#### CompactionMode + +_Underlying type:_ _string_ + +CompactionMode defines the auto-compaction-mode: 'periodic' or 'revision'. +'periodic' for duration based retention and 'revision' for revision number based retention. + +_Validation:_ +- Enum: [periodic revision] + +_Appears in:_ +- [SharedConfig](#sharedconfig) + +| Field | Description | +| --- | --- | +| `periodic` | Periodic is a constant to set auto-compaction-mode 'periodic' for duration based retention.
| +| `revision` | Revision is a constant to set auto-compaction-mode 'revision' for revision number based retention.
| + + +#### CompressionPolicy + +_Underlying type:_ _string_ + +CompressionPolicy defines the type of policy for compression of snapshots. + +_Validation:_ +- Enum: [gzip lzw zlib] + +_Appears in:_ +- [CompressionSpec](#compressionspec) + +| Field | Description | +| --- | --- | +| `gzip` | GzipCompression is constant for gzip compression policy.
| +| `lzw` | LzwCompression is constant for lzw compression policy.
| +| `zlib` | ZlibCompression is constant for zlib compression policy.
| + + +#### CompressionSpec + + + +CompressionSpec defines parameters related to compression of Snapshots(full as well as delta). + + + +_Appears in:_ +- [BackupSpec](#backupspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `enabled` _boolean_ | | | | +| `policy` _[CompressionPolicy](#compressionpolicy)_ | | | Enum: [gzip lzw zlib]
| + + +#### Condition + + + +Condition holds the information about the state of a resource. + + + +_Appears in:_ +- [EtcdCopyBackupsTaskStatus](#etcdcopybackupstaskstatus) +- [EtcdStatus](#etcdstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `type` _[ConditionType](#conditiontype)_ | Type of the Etcd condition. | | | +| `status` _[ConditionStatus](#conditionstatus)_ | Status of the condition, one of True, False, Unknown. | | | +| `lastTransitionTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#time-v1-meta)_ | Last time the condition transitioned from one status to another. | | | +| `lastUpdateTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#time-v1-meta)_ | Last time the condition was updated. | | | +| `reason` _string_ | The reason for the condition's last transition. | | | +| `message` _string_ | A human-readable message indicating details about the transition. | | | + + +#### ConditionStatus + +_Underlying type:_ _string_ + +ConditionStatus is the status of a condition. + + + +_Appears in:_ +- [Condition](#condition) + +| Field | Description | +| --- | --- | +| `True` | ConditionTrue means a resource is in the condition.
| +| `False` | ConditionFalse means a resource is not in the condition.
| +| `Unknown` | ConditionUnknown means Gardener can't decide if a resource is in the condition or not.
| +| `Progressing` | ConditionProgressing means the condition was seen true, failed but stayed within a predefined failure threshold.
In the future, we could add other intermediate conditions, e.g. ConditionDegraded.
| +| `ConditionCheckError` | ConditionCheckError is a constant for a reason in condition.
| + + +#### ConditionType + +_Underlying type:_ _string_ + +ConditionType is the type of condition. + + + +_Appears in:_ +- [Condition](#condition) + +| Field | Description | +| --- | --- | +| `Ready` | ConditionTypeReady is a constant for a condition type indicating that the etcd cluster is ready.
| +| `AllMembersReady` | ConditionTypeAllMembersReady is a constant for a condition type indicating that all members of the etcd cluster are ready.
| +| `BackupReady` | ConditionTypeBackupReady is a constant for a condition type indicating that the etcd backup is ready.
| +| `DataVolumesReady` | ConditionTypeDataVolumesReady is a constant for a condition type indicating that the etcd data volumes are ready.
| +| `Succeeded` | EtcdCopyBackupsTaskSucceeded is a condition type indicating that a EtcdCopyBackupsTask has succeeded.
| +| `Failed` | EtcdCopyBackupsTaskFailed is a condition type indicating that a EtcdCopyBackupsTask has failed.
| + + +#### CrossVersionObjectReference + + + +CrossVersionObjectReference contains enough information to let you identify the referred resource. + + + +_Appears in:_ +- [EtcdStatus](#etcdstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `kind` _string_ | Kind of the referent | | | +| `name` _string_ | Name of the referent | | | +| `apiVersion` _string_ | API version of the referent | | | + + +#### ErrorCode + +_Underlying type:_ _string_ + +ErrorCode is a string alias representing an error code that identifies an error. + + + +_Appears in:_ +- [LastError](#lasterror) + + + +#### Etcd + + + +Etcd is the Schema for the etcds API + + + + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `druid.gardener.cloud/v1alpha1` | | | +| `kind` _string_ | `Etcd` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[EtcdSpec](#etcdspec)_ | | | | +| `status` _[EtcdStatus](#etcdstatus)_ | | | | + + +#### EtcdConfig + + + +EtcdConfig defines the configuration for the etcd cluster to be deployed. + + + +_Appears in:_ +- [EtcdSpec](#etcdspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `quota` _[Quantity](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#quantity-resource-api)_ | Quota defines the etcd DB quota. | | | +| `defragmentationSchedule` _string_ | DefragmentationSchedule defines the cron standard schedule for defragmentation of etcd. | | | +| `serverPort` _integer_ | | | | +| `clientPort` _integer_ | | | | +| `image` _string_ | Image defines the etcd container image and tag | | | +| `authSecretRef` _[SecretReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#secretreference-v1-core)_ | | | | +| `metrics` _[MetricsLevel](#metricslevel)_ | Metrics defines the level of detail for exported metrics of etcd, specify 'extensive' to include histogram metrics. | | Enum: [basic extensive]
| +| `resources` _[ResourceRequirements](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#resourcerequirements-v1-core)_ | Resources defines the compute Resources required by etcd container.
More info: https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/ | | | +| `clientUrlTls` _[TLSConfig](#tlsconfig)_ | ClientUrlTLS contains the ca, server TLS and client TLS secrets for client communication to ETCD cluster | | | +| `peerUrlTls` _[TLSConfig](#tlsconfig)_ | PeerUrlTLS contains the ca and server TLS secrets for peer communication within ETCD cluster
Currently, PeerUrlTLS does not require client TLS secrets for gardener implementation of ETCD cluster. | | | +| `etcdDefragTimeout` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#duration-v1-meta)_ | EtcdDefragTimeout defines the timeout duration for etcd defrag call | | | +| `heartbeatDuration` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#duration-v1-meta)_ | HeartbeatDuration defines the duration for members to send heartbeats. The default value is 10s. | | | +| `clientService` _[ClientService](#clientservice)_ | ClientService defines the parameters of the client service that a user can specify | | | + + +#### EtcdCopyBackupsTask + + + +EtcdCopyBackupsTask is a task for copying etcd backups from a source to a target store. + + + + + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `apiVersion` _string_ | `druid.gardener.cloud/v1alpha1` | | | +| `kind` _string_ | `EtcdCopyBackupsTask` | | | +| `metadata` _[ObjectMeta](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#objectmeta-v1-meta)_ | Refer to Kubernetes API documentation for fields of `metadata`. | | | +| `spec` _[EtcdCopyBackupsTaskSpec](#etcdcopybackupstaskspec)_ | | | | +| `status` _[EtcdCopyBackupsTaskStatus](#etcdcopybackupstaskstatus)_ | | | | + + +#### EtcdCopyBackupsTaskSpec + + + +EtcdCopyBackupsTaskSpec defines the parameters for the copy backups task. + + + +_Appears in:_ +- [EtcdCopyBackupsTask](#etcdcopybackupstask) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `sourceStore` _[StoreSpec](#storespec)_ | SourceStore defines the specification of the source object store provider for storing backups. | | | +| `targetStore` _[StoreSpec](#storespec)_ | TargetStore defines the specification of the target object store provider for storing backups. | | | +| `maxBackupAge` _integer_ | MaxBackupAge is the maximum age in days that a backup must have in order to be copied.
By default all backups will be copied. | | | +| `maxBackups` _integer_ | MaxBackups is the maximum number of backups that will be copied starting with the most recent ones. | | | +| `waitForFinalSnapshot` _[WaitForFinalSnapshotSpec](#waitforfinalsnapshotspec)_ | WaitForFinalSnapshot defines the parameters for waiting for a final full snapshot before copying backups. | | | + + +#### EtcdCopyBackupsTaskStatus + + + +EtcdCopyBackupsTaskStatus defines the observed state of the copy backups task. + + + +_Appears in:_ +- [EtcdCopyBackupsTask](#etcdcopybackupstask) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `conditions` _[Condition](#condition) array_ | Conditions represents the latest available observations of an object's current state. | | | +| `observedGeneration` _integer_ | ObservedGeneration is the most recent generation observed for this resource. | | | +| `lastError` _string_ | LastError represents the last occurred error. | | | + + +#### EtcdMemberConditionStatus + +_Underlying type:_ _string_ + +EtcdMemberConditionStatus is the status of an etcd cluster member. + + + +_Appears in:_ +- [EtcdMemberStatus](#etcdmemberstatus) + +| Field | Description | +| --- | --- | +| `Ready` | EtcdMemberStatusReady indicates that the etcd member is ready.
| +| `NotReady` | EtcdMemberStatusNotReady indicates that the etcd member is not ready.
| +| `Unknown` | EtcdMemberStatusUnknown indicates that the status of the etcd member is unknown.
| + + +#### EtcdMemberStatus + + + +EtcdMemberStatus holds information about etcd cluster membership. + + + +_Appears in:_ +- [EtcdStatus](#etcdstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `name` _string_ | Name is the name of the etcd member. It is the name of the backing `Pod`. | | | +| `id` _string_ | ID is the ID of the etcd member. | | | +| `role` _[EtcdRole](#etcdrole)_ | Role is the role in the etcd cluster, either `Leader` or `Member`. | | | +| `status` _[EtcdMemberConditionStatus](#etcdmemberconditionstatus)_ | Status of the condition, one of True, False, Unknown. | | | +| `reason` _string_ | The reason for the condition's last transition. | | | +| `lastTransitionTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#time-v1-meta)_ | LastTransitionTime is the last time the condition's status changed. | | | + + +#### EtcdRole + +_Underlying type:_ _string_ + +EtcdRole is the role of an etcd cluster member. + + + +_Appears in:_ +- [EtcdMemberStatus](#etcdmemberstatus) + +| Field | Description | +| --- | --- | +| `Leader` | EtcdRoleLeader describes the etcd role `Leader`.
| +| `Member` | EtcdRoleMember describes the etcd role `Member`.
| + + +#### EtcdSpec + + + +EtcdSpec defines the desired state of Etcd + + + +_Appears in:_ +- [Etcd](#etcd) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `selector` _[LabelSelector](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#labelselector-v1-meta)_ | selector is a label query over pods that should match the replica count.
It must match the pod template's labels.
More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#label-selectors | | | +| `labels` _object (keys:string, values:string)_ | | | | +| `annotations` _object (keys:string, values:string)_ | | | | +| `etcd` _[EtcdConfig](#etcdconfig)_ | | | | +| `backup` _[BackupSpec](#backupspec)_ | | | | +| `sharedConfig` _[SharedConfig](#sharedconfig)_ | | | | +| `schedulingConstraints` _[SchedulingConstraints](#schedulingconstraints)_ | | | | +| `replicas` _integer_ | | | | +| `priorityClassName` _string_ | PriorityClassName is the name of a priority class that shall be used for the etcd pods. | | | +| `storageClass` _string_ | StorageClass defines the name of the StorageClass required by the claim.
More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 | | | +| `storageCapacity` _[Quantity](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#quantity-resource-api)_ | StorageCapacity defines the size of persistent volume. | | | +| `volumeClaimTemplate` _string_ | VolumeClaimTemplate defines the volume claim template to be created | | | + + +#### EtcdStatus + + + +EtcdStatus defines the observed state of Etcd. + + + +_Appears in:_ +- [Etcd](#etcd) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `observedGeneration` _integer_ | ObservedGeneration is the most recent generation observed for this resource. | | | +| `etcd` _[CrossVersionObjectReference](#crossversionobjectreference)_ | | | | +| `conditions` _[Condition](#condition) array_ | Conditions represents the latest available observations of an etcd's current state. | | | +| `serviceName` _string_ | ServiceName is the name of the etcd service.
Deprecated: this field will be removed in the future. | | | +| `lastError` _string_ | LastError represents the last occurred error.
Deprecated: Use LastErrors instead. | | | +| `lastErrors` _[LastError](#lasterror) array_ | LastErrors captures errors that occurred during the last operation. | | | +| `lastOperation` _[LastOperation](#lastoperation)_ | LastOperation indicates the last operation performed on this resource. | | | +| `clusterSize` _integer_ | Cluster size is the current size of the etcd cluster.
Deprecated: this field will not be populated with any value and will be removed in the future. | | | +| `currentReplicas` _integer_ | CurrentReplicas is the current replica count for the etcd cluster. | | | +| `replicas` _integer_ | Replicas is the replica count of the etcd cluster. | | | +| `readyReplicas` _integer_ | ReadyReplicas is the count of replicas being ready in the etcd cluster. | | | +| `ready` _boolean_ | Ready is `true` if all etcd replicas are ready. | | | +| `updatedReplicas` _integer_ | UpdatedReplicas is the count of updated replicas in the etcd cluster.
Deprecated: this field will be removed in the future. | | | +| `labelSelector` _[LabelSelector](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#labelselector-v1-meta)_ | LabelSelector is a label query over pods that should match the replica count.
It must match the pod template's labels.
Deprecated: this field will be removed in the future. | | | +| `members` _[EtcdMemberStatus](#etcdmemberstatus) array_ | Members represents the members of the etcd cluster | | | +| `peerUrlTLSEnabled` _boolean_ | PeerUrlTLSEnabled captures the state of peer url TLS being enabled for the etcd member(s) | | | + + +#### GarbageCollectionPolicy + +_Underlying type:_ _string_ + +GarbageCollectionPolicy defines the type of policy for snapshot garbage collection. + +_Validation:_ +- Enum: [Exponential LimitBased] + +_Appears in:_ +- [BackupSpec](#backupspec) + + + +#### LastError + + + +LastError stores details of the most recent error encountered for a resource. + + + +_Appears in:_ +- [EtcdStatus](#etcdstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `code` _[ErrorCode](#errorcode)_ | Code is an error code that uniquely identifies an error. | | | +| `description` _string_ | Description is a human-readable message indicating details of the error. | | | +| `observedAt` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#time-v1-meta)_ | ObservedAt is the time the error was observed. | | | + + +#### LastOperation + + + +LastOperation holds the information on the last operation done on the Etcd resource. + + + +_Appears in:_ +- [EtcdStatus](#etcdstatus) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `type` _[LastOperationType](#lastoperationtype)_ | Type is the type of last operation. | | | +| `state` _[LastOperationState](#lastoperationstate)_ | State is the state of the last operation. | | | +| `description` _string_ | Description describes the last operation. | | | +| `runID` _string_ | RunID correlates an operation with a reconciliation run.
Every time an Etcd resource is reconciled (barring status reconciliation which is periodic), a unique ID is
generated which can be used to correlate all actions done as part of a single reconcile run. Capturing this
as part of LastOperation aids in establishing this correlation. This further helps in also easily filtering
reconcile logs as all structured logs in a reconciliation run should have the `runID` referenced. | | | +| `lastUpdateTime` _[Time](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#time-v1-meta)_ | LastUpdateTime is the time at which the operation was last updated. | | | + + +#### LastOperationState + +_Underlying type:_ _string_ + +LastOperationState is a string alias representing the state of the last operation. + + + +_Appears in:_ +- [LastOperation](#lastoperation) + +| Field | Description | +| --- | --- | +| `Processing` | LastOperationStateProcessing indicates that an operation is in progress.
| +| `Succeeded` | LastOperationStateSucceeded indicates that an operation has completed successfully.
| +| `Error` | LastOperationStateError indicates that an operation is completed with errors and will be retried.
| +| `Requeue` | LastOperationStateRequeue indicates that an operation is not completed and either due to an error or unfulfilled conditions will be retried.
| + + +#### LastOperationType + +_Underlying type:_ _string_ + +LastOperationType is a string alias representing type of the last operation. + + + +_Appears in:_ +- [LastOperation](#lastoperation) + +| Field | Description | +| --- | --- | +| `Create` | LastOperationTypeCreate indicates that the last operation was a creation of a new Etcd resource.
| +| `Reconcile` | LastOperationTypeReconcile indicates that the last operation was a reconciliation of the spec of an Etcd resource.
| +| `Delete` | LastOperationTypeDelete indicates that the last operation was a deletion of an existing Etcd resource.
| + + +#### LeaderElectionSpec + + + +LeaderElectionSpec defines parameters related to the LeaderElection configuration. + + + +_Appears in:_ +- [BackupSpec](#backupspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `reelectionPeriod` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#duration-v1-meta)_ | ReelectionPeriod defines the Period after which leadership status of corresponding etcd is checked. | | | +| `etcdConnectionTimeout` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#duration-v1-meta)_ | EtcdConnectionTimeout defines the timeout duration for etcd client connection during leader election. | | | + + +#### MetricsLevel + +_Underlying type:_ _string_ + +MetricsLevel defines the level 'basic' or 'extensive'. + +_Validation:_ +- Enum: [basic extensive] + +_Appears in:_ +- [EtcdConfig](#etcdconfig) + +| Field | Description | +| --- | --- | +| `basic` | Basic is a constant for metrics level basic.
| +| `extensive` | Extensive is a constant for metrics level extensive.
| + + +#### SchedulingConstraints + + + +SchedulingConstraints defines the different scheduling constraints that must be applied to the +pod spec in the etcd statefulset. +Currently supported constraints are Affinity and TopologySpreadConstraints. + + + +_Appears in:_ +- [EtcdSpec](#etcdspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `affinity` _[Affinity](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#affinity-v1-core)_ | Affinity defines the various affinity and anti-affinity rules for a pod
that are honoured by the kube-scheduler. | | | +| `topologySpreadConstraints` _[TopologySpreadConstraint](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#topologyspreadconstraint-v1-core) array_ | TopologySpreadConstraints describes how a group of pods ought to spread across topology domains,
that are honoured by the kube-scheduler. | | | + + +#### SecretReference + + + +SecretReference defines a reference to a secret. + + + +_Appears in:_ +- [TLSConfig](#tlsconfig) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `name` _string_ | name is unique within a namespace to reference a secret resource. | | | +| `namespace` _string_ | namespace defines the space within which the secret name must be unique. | | | +| `dataKey` _string_ | DataKey is the name of the key in the data map containing the credentials. | | | + + +#### SharedConfig + + + +SharedConfig defines parameters shared and used by Etcd as well as backup-restore sidecar. + + + +_Appears in:_ +- [EtcdSpec](#etcdspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `autoCompactionMode` _[CompactionMode](#compactionmode)_ | AutoCompactionMode defines the auto-compaction-mode:'periodic' mode or 'revision' mode for etcd and embedded-etcd of backup-restore sidecar. | | Enum: [periodic revision]
| +| `autoCompactionRetention` _string_ | AutoCompactionRetention defines the auto-compaction-retention length for etcd as well as for embedded-etcd of backup-restore sidecar. | | | + + +#### StorageProvider + +_Underlying type:_ _string_ + +StorageProvider defines the type of object store provider for storing backups. + + + +_Appears in:_ +- [StoreSpec](#storespec) + + + +#### StoreSpec + + + +StoreSpec defines parameters related to ObjectStore persisting backups + + + +_Appears in:_ +- [BackupSpec](#backupspec) +- [EtcdCopyBackupsTaskSpec](#etcdcopybackupstaskspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `container` _string_ | Container is the name of the container the backup is stored at. | | | +| `prefix` _string_ | Prefix is the prefix used for the store. | | | +| `provider` _[StorageProvider](#storageprovider)_ | Provider is the name of the backup provider. | | | +| `secretRef` _[SecretReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#secretreference-v1-core)_ | SecretRef is the reference to the secret which used to connect to the backup store. | | | + + +#### TLSConfig + + + +TLSConfig hold the TLS configuration details. + + + +_Appears in:_ +- [BackupSpec](#backupspec) +- [EtcdConfig](#etcdconfig) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `tlsCASecretRef` _[SecretReference](#secretreference)_ | | | | +| `serverTLSSecretRef` _[SecretReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#secretreference-v1-core)_ | | | | +| `clientTLSSecretRef` _[SecretReference](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#secretreference-v1-core)_ | | | | + + +#### WaitForFinalSnapshotSpec + + + +WaitForFinalSnapshotSpec defines the parameters for waiting for a final full snapshot before copying backups. + + + +_Appears in:_ +- [EtcdCopyBackupsTaskSpec](#etcdcopybackupstaskspec) + +| Field | Description | Default | Validation | +| --- | --- | --- | --- | +| `enabled` _boolean_ | Enabled specifies whether to wait for a final full snapshot before copying backups. | | | +| `timeout` _[Duration](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.29/#duration-v1-meta)_ | Timeout is the timeout for waiting for a final full snapshot. When this timeout expires, the copying of backups
will be performed anyway. No timeout or 0 means wait forever. | | | + + diff --git a/docs/assets/css/custom.css b/docs/assets/css/custom.css new file mode 100644 index 000000000..b099e4ac5 --- /dev/null +++ b/docs/assets/css/custom.css @@ -0,0 +1,75 @@ +/* header */ +.md-header__button.md-logo { + margin-top: 0; + margin-bottom: 0; + padding-top: 0; + padding-bottom: 0; +} + +.md-header__button.md-logo img, +.md-header__button.md-logo svg { + height: 3rem; + width: 3rem; +} +.md-header__title { + font-size: 1.5rem; + font-weight: normal; + height: 3.5rem; + line-height: 3.5rem; + font-family: "Zilla Slab"; +} + +/* Navigation tabs */ +.md-tabs__list { + justify-content: left; +} + +.md-tabs__item { + height: 2.0rem; + padding-left: 1.0em; + padding-right: 1.0em; + font-family: "Roboto"; +} + +.md-tabs__link { + font-size: 0.70rem; + font-weight: normal; +} + +/* search box */ +.md-search__form { + border: 1px solid var(--pg-light-border); + box-shadow: rgba(0, 0, 0, 0) 0px 0px 0px 0px, rgba(0, 0, 0, 0) 0px 0px 0px 0px, rgba(0, 0, 0, 0.02) 0px 1px 3px 0px, rgba(0, 0, 0, 0.02) 0px 1px 2px -1px; + border-radius: 6px; + height: 1.6rem; +} + +[data-md-toggle="search"]:checked ~ .md-header .md-search__form { + border-radius: 6px 6px 0 0; +} + +.md-search__input { + font-size: .7rem; +} + +/* show table borders */ +.md-typeset td:not([class]):not(:last-child) { + border: 0.05rem solid var(--md-typeset-table-color); +} + +/* Highlight selected file in the left navigation pane*/ +.md-nav__link--active { + background-color: var(--md-code-bg-color); + border-radius: 0.3em; + padding: 0.2em; + outline: 0.1em; +} + +/* Additional styling to fix glitches */ +.md-sidebar__inner { + padding-top: 0.1em; +} + +.md-nav--lifted > .md-nav__list > .md-nav__item--active > .md-nav__link { + box-shadow: initial; +} \ No newline at end of file diff --git a/docs/concepts/etcd-cluster-components.md b/docs/concepts/etcd-cluster-components.md index 50ae160ce..9c130e426 100644 --- a/docs/concepts/etcd-cluster-components.md +++ b/docs/concepts/etcd-cluster-components.md @@ -36,7 +36,8 @@ An etcd cluster requires quorum for all write operations. Clients can additional To ensure that etcd pods are not evicted more than its failure tolerance, `etcd-druid` creates a [PodDisruptionBudget](https://kubernetes.io/docs/concepts/workloads/pods/disruptions/#pod-disruption-budgets). -> **NOTE:** For a single node etcd cluster a `PodDisruptionBudget` will be created, however `pdb.spec.minavailable` is set to 0 effectively disabling it. +!!! note + For a single node etcd cluster a `PodDisruptionBudget` will be created, however `pdb.spec.minavailable` is set to 0 effectively disabling it. **Code reference:** [PodDisruptionBudget-Component](https://github.com/gardener/etcd-druid/tree/480213808813c5282b19aff5f3fd6868529e779c/internal/component/poddistruptionbudget) @@ -62,7 +63,8 @@ To enable clients to connect to an etcd cluster a ClusterIP `Client` [Service](h Every member in an `Etcd` cluster has a dedicated [Lease](https://kubernetes.io/docs/concepts/architecture/leases/) that gets created which signifies that the member is alive. It is the responsibility of the `etcd-backup-store` side-car container to periodically renew the lease. -> Today the lease object is also used to indicate the member-ID and the role of the member in an etcd cluster. Possible roles are `Leader`, `Member`(which denotes that this is a member but not a leader). This will change in the future with [EtcdMember resource](https://github.com/gardener/etcd-druid/blob/3383e0219a6c21c6ef1d5610db964cc3524807c8/docs/proposals/04-etcd-member-custom-resource.md). +!!! note + Today the lease object is also used to indicate the member-ID and the role of the member in an etcd cluster. Possible roles are `Leader`, `Member`(which denotes that this is a member but not a leader). This will change in the future with [EtcdMember resource](https://github.com/gardener/etcd-druid/blob/3383e0219a6c21c6ef1d5610db964cc3524807c8/docs/proposals/04-etcd-member-custom-resource.md). **Code reference:** [Member-Lease-Component](https://github.com/gardener/etcd-druid/tree/3383e0219a6c21c6ef1d5610db964cc3524807c8/internal/component/memberlease) diff --git a/docs/deployment/configure-etcd-druid.md b/docs/deployment/configure-etcd-druid.md index e0a789ef7..8bece8357 100644 --- a/docs/deployment/configure-etcd-druid.md +++ b/docs/deployment/configure-etcd-druid.md @@ -26,7 +26,8 @@ If you wish to setup `etcd-druid` in high-availability mode then leader election Metrics bind-address is computed by joining the host and port. By default its value is computed as `:8080`. -> NOTE: Ensure that the `metrics-port` is also reflected in the `etcd-druid` deployment specification. +!!! tip + Ensure that the `metrics-port` is also reflected in the `etcd-druid` deployment specification. ### Webhook Server diff --git a/docs/deployment/getting-started-locally/getting-started-locally.md b/docs/deployment/getting-started-locally/getting-started-locally.md index 799d07db9..fb5f0f0ce 100644 --- a/docs/deployment/getting-started-locally/getting-started-locally.md +++ b/docs/deployment/getting-started-locally/getting-started-locally.md @@ -11,7 +11,7 @@ Before we can setup `etcd-druid` and use it to provision `Etcd` clusters, we nee `etcd-druid` uses [kind](https://kind.sigs.k8s.io/) as it's local Kubernetes engine. The local setup is configured for kind due to its convenience only. Any other Kubernetes setup would also work. ```bash -> make kind-up +make kind-up ``` This command sets up a new Kind cluster and stores the kubeconfig at `./hack/kind/kubeconfig`. Additionally, this command also deploys a local container registry as a docker container. This ensures faster image push/pull times. The local registry can be accessed as `localhost:5001` for pushing and pulling images. @@ -19,19 +19,19 @@ This command sets up a new Kind cluster and stores the kubeconfig at `./hack/kin To target this newly created cluster, set the `KUBECONFIG` environment variable to the kubeconfig file. ```bash -> export KUBECONFIG=$PWD/hack/kind/kubeconfig +export KUBECONFIG=$PWD/hack/kind/kubeconfig ``` > **Note:** If you wish to configure kind cluster differently then you can directly invoke the script and check its help to know about all configuration options. -> -> ```bash -> > ./hack/kind-up.sh -h -> usage: kind-up.sh [Options] -> Options: -> --cluster-name Name of the kind cluster to create. Default value is 'etcd-druid-e2e' -> --skip-registry Skip creating a local docker registry. Default value is false. -> --feature-gates Comma separated list of feature gates to enable on the cluster. -> ``` + +```bash +./hack/kind-up.sh -h + usage: kind-up.sh [Options] + Options: + --cluster-name Name of the kind cluster to create. Default value is 'etcd-druid-e2e' + --skip-registry Skip creating a local docker registry. Default value is false. + --feature-gates Comma separated list of feature gates to enable on the cluster. +``` ## 02-Setting up etcd-druid @@ -49,7 +49,7 @@ Any variant of `make deploy-*` command uses [helm](https://helm.sh/) and [skaffo #### Regular mode ```bash -> make deploy +make deploy ``` The above command will use [skaffold](https://skaffold.dev/) to build and deploy `etcd-druid` to the k8s kind cluster pointed to by `KUBECONFIG` environment variable. @@ -57,7 +57,7 @@ The above command will use [skaffold](https://skaffold.dev/) to build and deploy #### Dev mode ```bash -> make deploy-dev +make deploy-dev ``` This is similar to `make deploy` but additionally starts a [skaffold dev loop](https://skaffold.dev/docs/workflows/dev/). After the initial deployment, skaffold starts watching source files. Once it has detected changes, you can press any key to update the `etcd-druid` deployment. @@ -65,14 +65,15 @@ This is similar to `make deploy` but additionally starts a [skaffold dev loop](h #### Debug mode ```bash -> make deploy-debug +make deploy-debug ``` This is similar to `make deploy-dev` but additionally configures containers in pods for debugging as required for each container's runtime technology. The associated debugging ports are exposed and labelled so that they can be port-forwarded to the local machine. Skaffold disables automatic image rebuilding and syncing when using the `debug` mode as compared to `dev` mode. Go debugging uses [Delve](https://github.com/go-delve/delve). Please see the [skaffold debugging documentation](https://skaffold.dev/docs/workflows/debug/) how to setup your IDE accordingly. -> **Note:** Resuming or stopping only a single goroutine (Go Issue [25578](https://github.com/golang/go/issues/25578), [31132](https://github.com/golang/go/issues/31132)) is currently not supported, so the action will cause all the goroutines to get activated or paused. +!!! note + Resuming or stopping only a single goroutine (Go Issue [25578](https://github.com/golang/go/issues/25578), [31132](https://github.com/golang/go/issues/31132)) is currently not supported, so the action will cause all the goroutines to get activated or paused. This means that when a goroutine is paused on a breakpoint, then all the other goroutines are also paused. This should be kept in mind when using `skaffold debug`. @@ -80,7 +81,8 @@ This means that when a goroutine is paused on a breakpoint, then all the other g ### Deploying a Local Backup Store Emulator -> **Note:** This section is ***Optional*** and is only meant to describe steps to deploy a local object store which can be used for testing and development. If you either do not wish to enable backups or you wish to use remote (infra-provider-specific) object store then this section can be skipped. +!!! info + This section is ***Optional*** and is only meant to describe steps to deploy a local object store which can be used for testing and development. If you either do not wish to enable backups or you wish to use remote (infra-provider-specific) object store then this section can be skipped. An `Etcd` cluster provisioned via etcd-druid provides a capability to take regular delta and full snapshots which are stored in an object store. You can enable this functionality by ensuring that you fill in [spec.backup.store](https://github.com/gardener/etcd-druid/blob/3383e0219a6c21c6ef1d5610db964cc3524807c8/config/samples/druid_v1alpha1_etcd.yaml#L49-L54) section of the `Etcd` CR. @@ -91,18 +93,18 @@ An `Etcd` cluster provisioned via etcd-druid provides a capability to take regul ### Setting up Cloud Provider Object Store Secret -> **Note:** This section is ***Optional***. If you have disabled backup functionality or if you are using local storage or one of the supported object store emulators then you can skip this section. +!!! info + This section is ***Optional***. If you have disabled backup functionality or if you are using local storage or one of the supported object store emulators then you can skip this section. A Kubernetes [Secret](https://kubernetes.io/docs/concepts/configuration/secret/) needs to be created for cloud provider Object Store access. You can refer to the Secret YAML templates [here](https://github.com/gardener/etcd-backup-restore/tree/master/example/storage-provider-secrets). Replace the dummy values with the actual configuration and ensure that you have added the `metadata.name` and `metadata.namespace` to the secret. -> **Note:** -> -> * Secret should be deployed in the same namespace as the `Etcd` resource. -> * All the values in the data field of the secret YAML should in `base64` encoded format. +!!! tip + * Secret should be deployed in the same namespace as the `Etcd` resource. + * All the values in the data field of the secret YAML should in `base64` encoded format. To apply the secret run: ```bash -> kubectl apply -f +kubectl apply -f ``` ## 04-Preparing Etcd CR @@ -128,20 +130,21 @@ Brief explanation of the keys: * `store.provider` is the bucket provider. Pick from the options mentioned in comment. * `store.prefix` is the folder name that you want to use for your snapshots inside the bucket. -> **Note:** For developer convenience we have provided object store emulator specific etcd CR variants which can be used as if as well. +!!! tip + For developer convenience we have provided object store emulator specific etcd CR variants which can be used as if as well. ## 05-Applying Etcd CR Create the Etcd CR (Custom Resource) by applying the Etcd yaml to the cluster ```bash -> kubectl apply -f +kubectl apply -f ``` ## 06-Verify the Etcd Cluster To obtain information on the etcd cluster you can invoke the following command: ```bash -> kubectl get etcd -o=wide +kubectl get etcd -o=wide ``` We adhere to a naming convention for all resources that are provisioned for an `Etcd` cluster. Refer to [etcd-cluster-components](../../concepts/etcd-cluster-components.md) document to get details of all resources that are provisioned. @@ -152,9 +155,9 @@ We adhere to a naming convention for all resources that are provisioned for an ` ```bash # Put a key-value pair into the etcd -> etcdctl put +etcdctl put # Retrieve all key-value pairs from the etcd db -> etcdctl get --prefix "" +etcdctl get --prefix "" ``` For a multi-node etcd cluster, insert the key-value pair using the `etcd` container of one etcd member and retrieve it from the `etcd` container of another member to verify consensus among the multiple etcd members. @@ -167,7 +170,7 @@ For a multi-node etcd cluster, insert the key-value pair using the `etcd` contai If you wish to only delete the `Etcd` cluster then you can use the following command: ```bash -> kubectl delete etcd +kubectl delete etcd ``` This will add the `deletionTimestamp` to the `Etcd` resource. At the time the creation of the `Etcd` cluster, etcd-druid will add a finalizer to ensure that it cleans up all `Etcd` cluster resources before the CR is removed. @@ -180,12 +183,12 @@ etcd-druid will automatically pick up the deletion event and attempt clean up `E If you only wish to remove `etcd-druid` but retain the kind cluster then you can use the following make target: ```bash -> make undeploy +make undeploy ``` If you wish to delete the kind cluster then you can use the following make target: ```bash -> make kind-down +make kind-down ``` This cleans up the entire setup as the kind cluster gets deleted. diff --git a/docs/deployment/getting-started-locally/manage-azurite-emulator.md b/docs/deployment/getting-started-locally/manage-azurite-emulator.md index a556e77f9..579022a00 100644 --- a/docs/deployment/getting-started-locally/manage-azurite-emulator.md +++ b/docs/deployment/getting-started-locally/manage-azurite-emulator.md @@ -14,7 +14,7 @@ To interact with `Azurite` you must also install the Azure CLI `(version >=2.55. On macOS run: ```bash -> brew install azure-cli +brew install azure-cli ``` For other OS, please check the [Azure CLI installation documentation](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli). @@ -22,7 +22,7 @@ For other OS, please check the [Azure CLI installation documentation](https://le ## 01-Deploy Azurite ```bash -> make deploy-azurite +make deploy-azurite ``` The above make target will deploy `Azure` emulator in the target Kubernetes cluster. @@ -31,19 +31,19 @@ The above make target will deploy `Azure` emulator in the target Kubernetes clus We will be using the `azure-cli` to create an ABS container. Export the connection string to enable `azure-cli` to connect to `Azurite` emulator. ```bash -> export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" +export AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;" ``` To create an Azure Blob Storage Container in Azurite, run the following command: ```bash -> az storage container create -n +az storage container create -n ``` ## 03-Configure Secret Connection details for an Azure Object Store Container are put into a Kubernetes [Secret](https://kubernetes.io/docs/concepts/configuration/secret/). Apply the Kubernetes Secret manifest through: ```bash -> kubectl apply -f config/samples/etcd-secret-azurite.yaml +kubectl apply -f config/samples/etcd-secret-azurite.yaml ``` > **Note:** The secret created should be referred to in the `Etcd` CR in `spec.backup.store.secretRef`. @@ -52,6 +52,6 @@ Connection details for an Azure Object Store Container are put into a Kubernetes In addition to the kind cluster cleanup you should also unset the environment variable set in step-03 above. ```bash -> unset AZURE_STORAGE_CONNECTION_STRING +unset AZURE_STORAGE_CONNECTION_STRING ``` diff --git a/docs/deployment/getting-started-locally/manage-s3-emulator.md b/docs/deployment/getting-started-locally/manage-s3-emulator.md index a102b7c5c..835ee3a39 100644 --- a/docs/deployment/getting-started-locally/manage-s3-emulator.md +++ b/docs/deployment/getting-started-locally/manage-s3-emulator.md @@ -14,7 +14,7 @@ To interact with `LocalStack` you must also install the AWS CLI `(version >=1.29 On macOS run: ```bash -> brew install awscli +brew install awscli ``` For other OS, please check the [AWS CLI installation documentation](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html). @@ -22,7 +22,7 @@ For other OS, please check the [AWS CLI installation documentation](https://docs ## 01-Deploy LocalStack ```bash -> make deploy-localstack +make deploy-localstack ``` The above make target will deploy `LocalStack` in the target Kubernetes cluster. @@ -32,20 +32,20 @@ The above make target will deploy `LocalStack` in the target Kubernetes cluster. Configure `AWS CLI` to interact with LocalStack by setting the necessary environment variables. This configuration redirects S3 commands to the LocalStack endpoint and provides the required credentials for authentication. ```bash -> export AWS_ENDPOINT_URL_S3="http://localhost:4566" -> export AWS_ACCESS_KEY_ID=ACCESSKEYAWSUSER -> export AWS_SECRET_ACCESS_KEY=sEcreTKey -> export AWS_DEFAULT_REGION=us-east-2 +export AWS_ENDPOINT_URL_S3="http://localhost:4566" +export AWS_ACCESS_KEY_ID=ACCESSKEYAWSUSER +export AWS_SECRET_ACCESS_KEY=sEcreTKey +export AWS_DEFAULT_REGION=us-east-2 ``` Create a S3 bucket using the following command: ```bash -> aws s3api create-bucket --bucket --region --create-bucket-configuration LocationConstraint= --acl private +aws s3api create-bucket --bucket --region --create-bucket-configuration LocationConstraint= --acl private ``` To verify if the bucket has been created, you can use the following command: ```bash -> aws s3api head-bucket --bucket +aws s3api head-bucket --bucket ``` ## 03-Configure Secret @@ -53,7 +53,7 @@ To verify if the bucket has been created, you can use the following command: Connection details for an Azure S3 Object Store are put into a Kubernetes [Secret](https://kubernetes.io/docs/concepts/configuration/secret/). Apply the Kubernetes Secret manifest through: ```bash -> kubectl apply -f config/samples/etcd-secret-localstack.yaml +kubectl apply -f config/samples/etcd-secret-localstack.yaml ``` > **Note:** The secret created should be referred to in the `Etcd` CR in `spec.backup.store.secretRef`. @@ -62,5 +62,5 @@ Connection details for an Azure S3 Object Store are put into a Kubernetes [Secre In addition to the kind cluster cleanup you should also unset the environment variable set in step-03 above. ```bash -> unset AWS_ENDPOINT_URL_S3 AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_DEFAULT_REGION -``` +unset AWS_ENDPOINT_URL_S3 AWS_ACCESS_KEY_ID AWS_SECRET_ACCESS_KEY AWS_DEFAULT_REGION +``` \ No newline at end of file diff --git a/docs/deployment/production-setup-recommendations.md b/docs/deployment/production-setup-recommendations.md index 2fd280ee4..aef9fa0a2 100644 --- a/docs/deployment/production-setup-recommendations.md +++ b/docs/deployment/production-setup-recommendations.md @@ -89,7 +89,8 @@ Therefore the following is recommended: * To meet the increased demand, configure a [VPA](https://github.com/kubernetes/autoscaler/tree/cecb34cb863fb015264098b5379bdba40a9113cf/vertical-pod-autoscaler). You have to be careful on selection of `containerPolicies`, `targetRef`. * To meet the increased demand in storage etcd-druid already configures each etcd member to [auto-compact](https://etcd.io/docs/v3.4/op-guide/maintenance/#auto-compaction) and it also configures periodic [defragmentation](https://etcd.io/docs/v3.4/op-guide/maintenance/#defragmentation) of the etcd DB. The only case this will not help is when you only have unique writes all the time. -> **Note:** Care should be taken with usage of VPA. While it helps to vertically scale up etcd-member pods, it also can cause transient quorum loss. This is a direct consequence of the design of VPA - where recommendation is done by [Recommender](https://github.com/kubernetes/autoscaler/blob/2800c70d425b89e88cb6e608df494a0cd21f242d/vertical-pod-autoscaler/pkg/recommender/README.md) component, [Updater](https://github.com/kubernetes/autoscaler/blob/2800c70d425b89e88cb6e608df494a0cd21f242d/vertical-pod-autoscaler/pkg/updater/README.md) evicts the pods that do not have the resources recommended by the `Recommender` and [Admission Controller](https://github.com/kubernetes/autoscaler/blob/2800c70d425b89e88cb6e608df494a0cd21f242d/vertical-pod-autoscaler/pkg/admission-controller/README.md) which updates the resources on the Pods. All these three components act asynchronously and can fail independently, so while VPA respects PDB's it can easily enter into a state where updater evicts a pod while respecting PDB but the admission controller fails to apply the recommendation. The pod comes with a default resources which still differ from the recommended values, thus causing a repeat eviction. There are other race conditions that can also occur and one needs to be careful of using VPA for quorum based workloads. +!!! note + Care should be taken with usage of VPA. While it helps to vertically scale up etcd-member pods, it also can cause transient quorum loss. This is a direct consequence of the design of VPA - where recommendation is done by [Recommender](https://github.com/kubernetes/autoscaler/blob/2800c70d425b89e88cb6e608df494a0cd21f242d/vertical-pod-autoscaler/pkg/recommender/README.md) component, [Updater](https://github.com/kubernetes/autoscaler/blob/2800c70d425b89e88cb6e608df494a0cd21f242d/vertical-pod-autoscaler/pkg/updater/README.md) evicts the pods that do not have the resources recommended by the `Recommender` and [Admission Controller](https://github.com/kubernetes/autoscaler/blob/2800c70d425b89e88cb6e608df494a0cd21f242d/vertical-pod-autoscaler/pkg/admission-controller/README.md) which updates the resources on the Pods. All these three components act asynchronously and can fail independently, so while VPA respects PDB's it can easily enter into a state where updater evicts a pod while respecting PDB but the admission controller fails to apply the recommendation. The pod comes with a default resources which still differ from the recommended values, thus causing a repeat eviction. There are other race conditions that can also occur and one needs to be careful of using VPA for quorum based workloads. ## High Availability @@ -133,7 +134,8 @@ In most cloud providers there is no network cost (ingress/egress) for any traffi One could evaluate using [TopologyAwareRouting](https://kubernetes.io/docs/concepts/services-networking/topology-aware-routing/) which reduces cross-zonal traffic thus saving costs and latencies. -> **Note:** You can read about how it is done in Gardener [here](https://github.com/gardener/gardener/blob/master/docs/operations/topology_aware_routing.md). +!!! tip + You can read about how it is done in Gardener [here](https://github.com/gardener/gardener/blob/master/docs/operations/topology_aware_routing.md). ## Metrics & Alerts diff --git a/docs/development/changing-api.md b/docs/development/changing-api.md index 2a524e4b9..6d6e13f14 100644 --- a/docs/development/changing-api.md +++ b/docs/development/changing-api.md @@ -8,7 +8,8 @@ This guide provides detailed information on what needs to be done when the API n The etcd-druid API is defined [here](https://github.com/gardener/etcd-druid/tree/3383e0219a6c21c6ef1d5610db964cc3524807c8/api). -> **Note:** The current version of the API is `v1alpha1`. We are currently working on migration to `v1beta1` API. +!!! info + The current version of the API is `v1alpha1`. We are currently working on migration to `v1beta1` API. ### Changing the API diff --git a/docs/development/controllers.md b/docs/development/controllers.md index e85197d84..7425a9475 100644 --- a/docs/development/controllers.md +++ b/docs/development/controllers.md @@ -52,7 +52,8 @@ The *etcd controller* is essential to the functioning of the etcd cluster and et While building the controller, an event filter is set such that the behavior of the controller, specifically for `Etcd` update operations, depends on the `gardener.cloud/operation: reconcile` *annotation*. This is controlled by the `--enable-etcd-spec-auto-reconcile` CLI flag, which, if set to `false`, tells the controller to perform reconciliation only when this annotation is present. If the flag is set to `true`, the controller will reconcile the etcd cluster anytime the `Etcd` spec, and thus `generation`, changes, and the next queued event for it is triggered. -> **Note:** Creation and deletion of `Etcd` resources are not affected by the above flag or annotation. +!!! note + Creation and deletion of `Etcd` resources are not affected by the above flag or annotation. The reason this filter is present is that any disruption in the `Etcd` resource due to reconciliation (due to changes in the `Etcd` spec, for example) while workloads are being run would cause unwanted downtimes to the etcd cluster. Hence, any user who wishes to avoid such disruptions, can choose to set the `--enable-etcd-spec-auto-reconcile` CLI flag to `false`. An example of this is Gardener's [gardenlet](https://github.com/gardener/gardener/blob/676d1bd9e95d80b9f4bc9c56807806031da5d1ce/docs/concepts/gardenlet.md), which reconciles the `Etcd` resource only during a shoot cluster's [*maintenance window*](https://github.com/gardener/gardener/blob/676d1bd9e95d80b9f4bc9c56807806031da5d1ce/docs/usage/shoot/shoot_maintenance.md). diff --git a/docs/development/dependency-management.md b/docs/development/dependency-management.md index 485ae5f7c..e7b682839 100644 --- a/docs/development/dependency-management.md +++ b/docs/development/dependency-management.md @@ -11,7 +11,8 @@ Unfortunately go does not differentiate between `dev` and `test` dependencies. I The `Makefile` contains a rule called `tidy` which performs [go mod tidy](https://go.dev/ref/mod#go-mod-tidy) which ensures that the `go.mod` file matches the source code in the module. It adds any missing module requirements necessary to build the current module’s packages and dependencies, and it removes requirements on modules that don’t provide any relevant packages. It also adds any missing entries to `go.sum` and removes unnecessary entries. ```bash -> make tidy +make tidy ``` -:warning: Make sure that you test the code after you have updated the dependencies! +!!! warning + Make sure that you test the code after you have updated the dependencies! diff --git a/docs/development/getting-started-locally.md b/docs/development/getting-started-locally.md index 068bfbe5c..a76e959c6 100644 --- a/docs/development/getting-started-locally.md +++ b/docs/development/getting-started-locally.md @@ -5,4 +5,5 @@ You can setup `etcd-druid` locally by following detailed instructions in [this d * For best development experience you should use `make deploy-dev` - this helps during development where you wish to make changes to the code base and with a key-press allow automatic re-deployment of the application to the target Kubernetes cluster. * In case you wish to start a debugging session then use `make deploy-debug` - this will additionally disable leader election and prevent leases to expire and process to stop. -> **Note:** We leverage [skaffold debug](https://skaffold.dev/docs/workflows/debug/) and [skaffold dev](https://skaffold.dev/docs/workflows/dev/) features. \ No newline at end of file +!!! info + We leverage [skaffold debug](https://skaffold.dev/docs/workflows/debug/) and [skaffold dev](https://skaffold.dev/docs/workflows/dev/) features. \ No newline at end of file diff --git a/docs/development/prepare-dev-environment.md b/docs/development/prepare-dev-environment.md index 2a5cf8ed1..232a09dd6 100644 --- a/docs/development/prepare-dev-environment.md +++ b/docs/development/prepare-dev-environment.md @@ -6,7 +6,7 @@ This guide will provide with detailed instructions on installing all dependencie Hombrew is a popular package manager for macOS. You can install it by executing the following command in a terminal: ```bash -> /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" ``` ## Installing Go @@ -14,7 +14,7 @@ Hombrew is a popular package manager for macOS. You can install it by executing On macOS run: ```bash -> brew install go +brew install go ``` Alternatively you can also follow the [Go installation documentation](https://go.dev/doc/install). @@ -25,7 +25,7 @@ We use `git` as VCS which you need to install. On macOS run: ```bash -> brew install git +brew install git ``` For other OS, please check the [Git installation documentation](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git). @@ -37,7 +37,7 @@ You need to have docker installed and running. This will allow starting a [kind] On macOS run: ```bash -> brew install docker +brew install docker ``` Alternatively you can also follow the [Docker installation documentation](https://docs.docker.com/get-docker/). @@ -48,7 +48,7 @@ To interact with the local Kubernetes cluster you will need kubectl. On macOS run: ```bash -> brew install kubernetes-cli +brew install kubernetes-cli ``` For other OS, please check the [Kubectl installation documentation](https://kubernetes.io/docs/tasks/tools/). @@ -60,17 +60,17 @@ On macOS run: ```bash # jq (https://jqlang.github.io/jq/) is a lightweight and flexible command-line JSON processor -> brew install jq +brew install jq # yq (https://mikefarah.gitbook.io/yq) is a lightweight and portable command-line YAML processor. -> brew install yq +brew install yq ``` ## Get the sources Clone the repository from Github into your `$GOPATH`. ```bash -> mkdir -p $(go env GOPATH)/src/github.com/gardener -> cd $(go env GOPATH)src/github.com/gardener -> git clone https://github.com/gardener/etcd-druid.git +mkdir -p $(go env GOPATH)/src/github.com/gardener +cd $(go env GOPATH)src/github.com/gardener +git clone https://github.com/gardener/etcd-druid.git # alternatively you can also use `git clone git@github.com:gardener/etcd-druid.git` ``` diff --git a/docs/development/raising-a-pr.md b/docs/development/raising-a-pr.md index 15ed4d304..7ec51afb1 100644 --- a/docs/development/raising-a-pr.md +++ b/docs/development/raising-a-pr.md @@ -38,7 +38,8 @@ For every pull-request, it is ***mandatory*** to raise an [Issue](https://github * `make test-integration` - this target will run all integration tests (controller level tests) using `envtest` framework. * `make ci-e2e-kind` or any of its variants - these targets will run etcd-druid e2e tests. - > **Note:** Please ensure that after introduction of new code the code coverage does not reduce. An increase in code coverage is always welcome. + !!! warning + Please ensure that after introduction of new code the code coverage does not reduce. An increase in code coverage is always welcome. * If you add new features, make sure that you create relevant documentation under `/docs`. diff --git a/docs/development/running-e2e-tests.md b/docs/development/running-e2e-tests.md index 27b820ba4..a5e3847e8 100644 --- a/docs/development/running-e2e-tests.md +++ b/docs/development/running-e2e-tests.md @@ -77,7 +77,7 @@ The following environment variables influence how the flow described above is ex Example: -``` +```bash make \ AWS_ACCESS_KEY_ID="abc" \ AWS_SECRET_ACCESS_KEY="xyz" \ @@ -96,7 +96,7 @@ test-e2e Example: -``` +```bash make \ STORAGE_ACCOUNT="abc" \ STORAGE_KEY="eHl6Cg==" \ @@ -114,7 +114,7 @@ test-e2e Example: -``` +```bash make \ GCP_SERVICEACCOUNT_JSON_PATH="/var/lib/secrets/serviceaccount.json" \ GCP_PROJECT_ID="xyz-project" \ @@ -131,7 +131,7 @@ No special environment variables are required for running e2e tests with `Local` Example: -``` +```bash make \ KUBECONFIG="$HOME/.kube/config" \ PROVIDERS="local" \ @@ -155,6 +155,7 @@ The developer needs to run `make ci-e2e-kind` command. This command in turn runs ### How to execute e2e tests with localstack and KIND cluster Run the following `make` command to spin up a KinD cluster, deploy localstack and run the e2e tests with provider `aws`: -``` + +```bash make ci-e2e-kind ``` diff --git a/docs/development/testing.md b/docs/development/testing.md index e3078fd08..60434ae0d 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.md @@ -50,12 +50,13 @@ For any new contributions **tests are a strict requirement**. `Boy Scouts Rule` ### Running Unit Tests -> **NOTE:** For unit tests we are currently transitioning away from [ginkgo](https://github.com/onsi/ginkgo) to using golang native tests. The `make test-unit` target runs both ginkgo and golang native tests. Once the transition is complete this target will be simplified. +!!! info + For unit tests we are currently transitioning away from [ginkgo](https://github.com/onsi/ginkgo) to using golang native tests. The `make test-unit` target runs both ginkgo and golang native tests. Once the transition is complete this target will be simplified. Run all unit tests ```bash -> make test-unit +make test-unit ``` Run unit tests of specific packages: @@ -73,15 +74,15 @@ If tests have sporadic failures, then trying running `./hack/stress-test.sh` whi ```bash # install the stress tool -> go install golang.org/x/tools/cmd/stress@latest +go install golang.org/x/tools/cmd/stress@latest # invoke the helper script to execute the stress test -> ./hack/stress-test.sh test-package= test-func= tool-params="" +./hack/stress-test.sh test-package= test-func= tool-params="" ``` An example invocation: ```bash -> ./hack/stress-test.sh test-package=./internal/utils test-func=TestRunConcurrentlyWithAllSuccessfulTasks tool-params="-p 10" +./hack/stress-test.sh test-package=./internal/utils test-func=TestRunConcurrentlyWithAllSuccessfulTasks tool-params="-p 10" 5s: 877 runs so far, 0 failures 10s: 1906 runs so far, 0 failures 15s: 2885 runs so far, 0 failures @@ -94,7 +95,8 @@ An example invocation: Integration tests in etcd-druid use [envtest](https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/envtest). It sets up a minimal temporary control plane (etcd + kube-apiserver) and runs the test against it. Test suites (group of tests) start their individual `envtest` environment before running the tests for the respective controller/webhook. Before exiting, the temporary test environment is shutdown. -> **NOTE:** For integration-tests we are currently transitioning away from [ginkgo](https://github.com/onsi/ginkgo) to using golang native tests. All ginkgo integration tests can be found [here](https://github.com/gardener/etcd-druid/tree/4e9971aba3c3880a4cb6583d05843eabb8ca1409/test/integration) and golang native integration tests can be found [here](https://github.com/gardener/etcd-druid/tree/4e9971aba3c3880a4cb6583d05843eabb8ca1409/test/it). +!!! info + For integration-tests we are currently transitioning away from [ginkgo](https://github.com/onsi/ginkgo) to using golang native tests. All ginkgo integration tests can be found [here](https://github.com/gardener/etcd-druid/tree/4e9971aba3c3880a4cb6583d05843eabb8ca1409/test/integration) and golang native integration tests can be found [here](https://github.com/gardener/etcd-druid/tree/4e9971aba3c3880a4cb6583d05843eabb8ca1409/test/it). * Integration tests in etcd-druid only targets a single controller. It is therefore advised that code (other than common utility functions should not be shared between any two controllers). * If you are sharing a common `envtest` environment across tests then it is recommended that an individual test is run in a dedicated `namespace`. @@ -107,7 +109,7 @@ Integration tests in etcd-druid use [envtest](https://pkg.go.dev/sigs.k8s.io/con ### Running Integration Tests ```bash -> make test-integration +make test-integration ``` ### Debugging Integration Tests @@ -118,33 +120,34 @@ There are two ways in which you can debug Integration Tests: All commonly used IDE's provide in-built or easy integration with [delve](https://pkg.go.dev/github.com/go-delve/delve) debugger. For debugging integration tests the only additional requirement is to set `KUBEBUILDER_ASSETS` environment variable. You can get the value of this environment variable by executing the following command: ```bash # ENVTEST_K8S_VERSION is the k8s version that you wish to use for testing. -> setup-envtest --os $(go env GOOS) --arch $(go env GOARCH) use $ENVTEST_K8S_VERSION -p path +setup-envtest --os $(go env GOOS) --arch $(go env GOARCH) use $ENVTEST_K8S_VERSION -p path ``` -> NOTE: All integration tests usually have a timeout. If you wish to debug a failing integration-test then increase the timeouts. +!!! tip + All integration tests usually have a timeout. If you wish to debug a failing integration-test then increase the timeouts. #### Use standalone envtest We also provide a capability to setup a stand-alone `envtest` and leverage the cluster to run individual integration-test. This allows you more control over when this k8s control plane is destroyed and allows you to inspect the resources at the end of the integration-test run using `kubectl`. -> **NOTE:** While you can use an existing cluster (e.g., `kind`), some test suites expect that no controllers and no nodes are running in the test environment (as it is the case in `envtest` test environments). Hence, using a full-blown cluster with controllers and nodes might sometimes be impractical, as you would need to stop cluster components for the tests to work. +> While you can use an existing cluster (e.g., `kind`), some test suites expect that no controllers and no nodes are running in the test environment (as it is the case in `envtest` test environments). Hence, using a full-blown cluster with controllers and nodes might sometimes be impractical, as you would need to stop cluster components for the tests to work. To setup a standalone `envtest` and run an integration test against it, do the following: ```bash # In a terminal session use the following make target to setup a standalone envtest -> make start-envtest +make start-envtest # As part of output path to kubeconfig will be also be printed on the console. # In another terminal session setup resource(s) watch: -> kubectl get po -A -w # alternatively you can also use `watch -d ` utility. +kubectl get po -A -w # alternatively you can also use `watch -d ` utility. # In another terminal session: -> export KUBECONFIG= -> export USE_EXISTING_K8S_CLUSTER=true +export KUBECONFIG= +export USE_EXISTING_K8S_CLUSTER=true # run the test -> go test -run="" +go test -run="" # example: go test -run="^TestEtcdDeletion/test deletion of all*" ./test/it/controller/etcd ``` diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 000000000..7098a9a16 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,70 @@ + +

+etcd-druid-tagline +

+ +

+ + REUSE Status + + + CI Build Status + + + Go Report Card + + + License: Apache-2.0 + + + Release + + + Go Reference + +

+ +`etcd-druid` is an [etcd](https://github.com/etcd-io/etcd) [operator](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) which makes it easy to configure, provision, reconcile, monitor and delete etcd clusters. It enables management of etcd clusters through [declarative Kubernetes API model](config/crd/bases/crd-druid.gardener.cloud_etcds.yaml). + +In every etcd cluster managed by `etcd-druid`, each etcd member is a two container `Pod` which consists of: + +- [etcd-wrapper](https://github.com/gardener/etcd-wrapper) which manages the lifecycle (validation & initialization) of an etcd. +- [etcd-backup-restore](https://github.com/gardener/etcd-backup-restore) sidecar which currently provides the following capabilities (the list is not comprehensive): + - [etcd](https://github.com/etcd-io/etcd) DB validation. + - Scheduled [etcd](https://github.com/etcd-io/etcd) DB defragmentation. + - Backup - [etcd](https://github.com/etcd-io/etcd) DB snapshots are taken regularly and backed in an object store if one is configured. + - Restoration - In case of a DB corruption for a single-member cluster it helps in restoring from latest set of snapshots (full & delta). + - Member control operations. + +`etcd-druid` additional provides the following capabilities: + +- Facilitates declarative scale-out of [etcd](https://github.com/etcd-io/etcd) clusters. +- Provides protection against accidental deletion/mutation of resources provisioned as part of an etcd cluster. +- Offers an asynchronous and threshold based capability to process backed up snapshots to: + - Potentially minimize the recovery time by leveraging restoration from backups followed by [etcd's compaction and defragmentation](https://etcd.io/docs/v3.4/op-guide/maintenance/). + - Indirectly assert integrity of the backed up snaphots. +- Allows seamless copy of backups between any two object store buckets. + +## Start using or developing `etcd-druid` locally + +If you are looking to try out druid then you can use a [Kind](https://kind.sigs.k8s.io/) cluster based setup. + + + +For detailed documentation, see our `/docs` folder. Please find the [index](docs/README.md) here. + +## Contributions + +If you wish to contribute then please see our [contributor guidelines](docs/development/contribution.md). + +## Feedback and Support + +We always look forward to active community engagement. Please report bugs or suggestions on how we can enhance `etcd-druid` on [GitHub Issues](https://github.com/gardener/etcd-druid/issues). + +## License + +Release under [Apache-2.0](https://github.com/gardener/etcd-druid/blob/master/LICENSE) license. diff --git a/docs/proposals/00-template.md b/docs/proposals/00-template.md index cde850c51..5ba0e6069 100644 --- a/docs/proposals/00-template.md +++ b/docs/proposals/00-template.md @@ -13,15 +13,6 @@ reviewers: # DEP-NN: Your short, descriptive title -## Table of Contents - -- [Summary](#summary) -- [Motivation](#motivation) - - [Goals](#goals) - - [Non-Goals](#non-goals) -- [Proposal](#proposal) -- [Alternatives](#alternatives) - ## Summary ## Motivation diff --git a/docs/proposals/01-multi-node-etcd-clusters.md b/docs/proposals/01-multi-node-etcd-clusters.md index 0f806e27f..63c6350d3 100644 --- a/docs/proposals/01-multi-node-etcd-clusters.md +++ b/docs/proposals/01-multi-node-etcd-clusters.md @@ -1,149 +1,7 @@ -# Multi-node etcd cluster instances via etcd-druid +# DEP-01: Multi-node etcd cluster instances via etcd-druid This document proposes an approach (along with some alternatives) to support provisioning and management of multi-node etcd cluster instances via [etcd-druid](https://github.com/gardener/etcd-druid) and [etcd-backup-restore](https://github.com/gardener/etcd-backup-restore). -## Content - -- [Multi-node etcd cluster instances via etcd-druid](#multi-node-etcd-cluster-instances-via-etcd-druid) - - [Content](#content) - - [Goal](#goal) - - [Background and Motivation](#background-and-motivation) - - [Single-node etcd cluster](#single-node-etcd-cluster) - - [Multi-node etcd-cluster](#multi-node-etcd-cluster) - - [Dynamic multi-node etcd cluster](#dynamic-multi-node-etcd-cluster) - - [Prior Art](#prior-art) - - [ETCD Operator from CoreOS](#etcd-operator-from-coreos) - - [etcdadm from kubernetes-sigs](#etcdadm-from-kubernetes-sigs) - - [Etcd Cluster Operator from Improbable-Engineering](#etcd-cluster-operator-from-improbable-engineering) - - [General Approach to ETCD Cluster Management](#general-approach-to-etcd-cluster-management) - - [Bootstrapping](#bootstrapping) - - [Assumptions](#assumptions) - - [Adding a new member to an etcd cluster](#adding-a-new-member-to-an-etcd-cluster) - - [Note](#note) - - [Alternative](#alternative) - - [Managing Failures](#managing-failures) - - [Removing an existing member from an etcd cluster](#removing-an-existing-member-from-an-etcd-cluster) - - [Restarting an existing member of an etcd cluster](#restarting-an-existing-member-of-an-etcd-cluster) - - [Recovering an etcd cluster from failure of majority of members](#recovering-an-etcd-cluster-from-failure-of-majority-of-members) - - [Kubernetes Context](#kubernetes-context) - - [Alternative](#alternative-1) - - [ETCD Configuration](#etcd-configuration) - - [Alternative](#alternative-2) - - [Data Persistence](#data-persistence) - - [Persistent](#persistent) - - [Ephemeral](#ephemeral) - - [Disk](#disk) - - [In-memory](#in-memory) - - [How to detect if valid metadata exists in an etcd member](#how-to-detect-if-valid-metadata-exists-in-an-etcd-member) - - [Recommendation](#recommendation) - - [How to detect if valid data exists in an etcd member](#how-to-detect-if-valid-data-exists-in-an-etcd-member) - - [Recommendation](#recommendation-1) - - [Separating peer and client traffic](#separating-peer-and-client-traffic) - - [Cutting off client requests](#cutting-off-client-requests) - - [Manipulating Client Service podSelector](#manipulating-client-service-podselector) - - [Health Check](#health-check) - - [Backup Failure](#backup-failure) - - [Alternative](#alternative-3) - - [Status](#status) - - [Members](#members) - - [Note](#note-1) - - [Member name as the key](#member-name-as-the-key) - - [Member Leases](#member-leases) - - [Conditions](#conditions) - - [ClusterSize](#clustersize) - - [Alternative](#alternative-4) - - [Decision table for etcd-druid based on the status](#decision-table-for-etcd-druid-based-on-the-status) - - [1. Pink of health](#1-pink-of-health) - - [Observed state](#observed-state) - - [Recommended Action](#recommended-action) - - [2. Member status is out of sync with their leases](#2-member-status-is-out-of-sync-with-their-leases) - - [Observed state](#observed-state-1) - - [Recommended Action](#recommended-action-1) - - [3. All members are `Ready` but `AllMembersReady` condition is stale](#3-all-members-are-ready-but-allmembersready-condition-is-stale) - - [Observed state](#observed-state-2) - - [Recommended Action](#recommended-action-2) - - [4. Not all members are `Ready` but `AllMembersReady` condition is stale](#4-not-all-members-are-ready-but-allmembersready-condition-is-stale) - - [Observed state](#observed-state-3) - - [Recommended Action](#recommended-action-3) - - [5. Majority members are `Ready` but `Ready` condition is stale](#5-majority-members-are-ready-but-ready-condition-is-stale) - - [Observed state](#observed-state-4) - - [Recommended Action](#recommended-action-4) - - [6. Majority members are `NotReady` but `Ready` condition is stale](#6-majority-members-are-notready-but-ready-condition-is-stale) - - [Observed state](#observed-state-5) - - [Recommended Action](#recommended-action-5) - - [7. Some members have been in `Unknown` status for a while](#7-some-members-have-been-in-unknown-status-for-a-while) - - [Observed state](#observed-state-6) - - [Recommended Action](#recommended-action-6) - - [8. Some member pods are not `Ready` but have not had the chance to update their status](#8-some-member-pods-are-not-ready-but-have-not-had-the-chance-to-update-their-status) - - [Observed state](#observed-state-7) - - [Recommended Action](#recommended-action-7) - - [9. Quorate cluster with a minority of members `NotReady`](#9-quorate-cluster-with-a-minority-of-members-notready) - - [Observed state](#observed-state-8) - - [Recommended Action](#recommended-action-8) - - [10. Quorum lost with a majority of members `NotReady`](#10-quorum-lost-with-a-majority-of-members-notready) - - [Observed state](#observed-state-9) - - [Recommended Action](#recommended-action-9) - - [11. Scale up of a healthy cluster](#11-scale-up-of-a-healthy-cluster) - - [Observed state](#observed-state-10) - - [Recommended Action](#recommended-action-10) - - [12. Scale down of a healthy cluster](#12-scale-down-of-a-healthy-cluster) - - [Observed state](#observed-state-11) - - [Recommended Action](#recommended-action-11) - - [13. Superfluous member entries in `Etcd` status](#13-superfluous-member-entries-in-etcd-status) - - [Observed state](#observed-state-12) - - [Recommended Action](#recommended-action-12) - - [Decision table for etcd-backup-restore during initialization](#decision-table-for-etcd-backup-restore-during-initialization) - - [1. First member during bootstrap of a fresh etcd cluster](#1-first-member-during-bootstrap-of-a-fresh-etcd-cluster) - - [Observed state](#observed-state-13) - - [Recommended Action](#recommended-action-13) - - [2. Addition of a new following member during bootstrap of a fresh etcd cluster](#2-addition-of-a-new-following-member-during-bootstrap-of-a-fresh-etcd-cluster) - - [Observed state](#observed-state-14) - - [Recommended Action](#recommended-action-14) - - [3. Restart of an existing member of a quorate cluster with valid metadata and data](#3-restart-of-an-existing-member-of-a-quorate-cluster-with-valid-metadata-and-data) - - [Observed state](#observed-state-15) - - [Recommended Action](#recommended-action-15) - - [4. Restart of an existing member of a quorate cluster with valid metadata but without valid data](#4-restart-of-an-existing-member-of-a-quorate-cluster-with-valid-metadata-but-without-valid-data) - - [Observed state](#observed-state-16) - - [Recommended Action](#recommended-action-16) - - [5. Restart of an existing member of a quorate cluster without valid metadata](#5-restart-of-an-existing-member-of-a-quorate-cluster-without-valid-metadata) - - [Observed state](#observed-state-17) - - [Recommended Action](#recommended-action-17) - - [6. Restart of an existing member of a non-quorate cluster with valid metadata and data](#6-restart-of-an-existing-member-of-a-non-quorate-cluster-with-valid-metadata-and-data) - - [Observed state](#observed-state-18) - - [Recommended Action](#recommended-action-18) - - [7. Restart of the first member of a non-quorate cluster without valid data](#7-restart-of-the-first-member-of-a-non-quorate-cluster-without-valid-data) - - [Observed state](#observed-state-19) - - [Recommended Action](#recommended-action-19) - - [8. Restart of a following member of a non-quorate cluster without valid data](#8-restart-of-a-following-member-of-a-non-quorate-cluster-without-valid-data) - - [Observed state](#observed-state-20) - - [Recommended Action](#recommended-action-20) - - [Backup](#backup) - - [Leading ETCD main container’s sidecar is the backup leader](#leading-etcd-main-containers-sidecar-is-the-backup-leader) - - [Independent leader election between backup-restore sidecars](#independent-leader-election-between-backup-restore-sidecars) - - [History Compaction](#history-compaction) - - [Defragmentation](#defragmentation) - - [Work-flows in etcd-backup-restore](#work-flows-in-etcd-backup-restore) - - [Work-flows independent of leader election in all members](#work-flows-independent-of-leader-election-in-all-members) - - [Work-flows only on the leading member](#work-flows-only-on-the-leading-member) - - [High Availability](#high-availability) - - [Zonal Cluster - Single Availability Zone](#zonal-cluster---single-availability-zone) - - [Alternative](#alternative-5) - - [Regional Cluster - Multiple Availability Zones](#regional-cluster---multiple-availability-zones) - - [Alternative](#alternative-6) - - [PodDisruptionBudget](#poddisruptionbudget) - - [Rolling updates to etcd members](#rolling-updates-to-etcd-members) - - [Follow Up](#follow-up) - - [Ephemeral Volumes](#ephemeral-volumes) - - [Shoot Control-Plane Migration](#shoot-control-plane-migration) - - [Performance impact of multi-node etcd clusters](#performance-impact-of-multi-node-etcd-clusters) - - [Metrics, Dashboards and Alerts](#metrics-dashboards-and-alerts) - - [Costs](#costs) - - [Future Work](#future-work) - - [Gardener Ring](#gardener-ring) - - [Autonomous Shoot Clusters](#autonomous-shoot-clusters) - - [Optimization of recovery from non-quorate cluster with some member containing valid data](#optimization-of-recovery-from-non-quorate-cluster-with-some-member-containing-valid-data) - - [Optimization of rolling updates to unhealthy etcd clusters](#optimization-of-rolling-updates-to-unhealthy-etcd-clusters) - ## Goal - Enhance etcd-druid and etcd-backup-restore to support provisioning and management of multi-node etcd cluster instances within a single Kubernetes cluster. diff --git a/docs/proposals/02-snapshot-compaction.md b/docs/proposals/02-snapshot-compaction.md index 38494debd..76619a777 100644 --- a/docs/proposals/02-snapshot-compaction.md +++ b/docs/proposals/02-snapshot-compaction.md @@ -1,4 +1,4 @@ -# Snapshot Compaction for Etcd +# DEP-02: Snapshot Compaction for Etcd ## Current Problem To ensure recoverability of Etcd, backups of the database are taken at regular interval. diff --git a/docs/proposals/03-scaling-up-an-etcd-cluster.md b/docs/proposals/03-scaling-up-an-etcd-cluster.md index 998e15108..51af42f94 100644 --- a/docs/proposals/03-scaling-up-an-etcd-cluster.md +++ b/docs/proposals/03-scaling-up-an-etcd-cluster.md @@ -1,4 +1,4 @@ -# Scaling-up a single-node to multi-node etcd cluster deployed by etcd-druid +# DEP-03: Scaling-up a single-node to multi-node etcd cluster deployed by etcd-druid To mark a cluster for scale-up from single node to multi-node etcd, just patch the etcd custom resource's `.spec.replicas` from `1` to `3` (for example). diff --git a/docs/proposals/04-etcd-member-custom-resource.md b/docs/proposals/04-etcd-member-custom-resource.md index 5b9cf6ea2..3a75e9ed2 100644 --- a/docs/proposals/04-etcd-member-custom-resource.md +++ b/docs/proposals/04-etcd-member-custom-resource.md @@ -12,46 +12,6 @@ reviewers: # DEP-04: EtcdMember Custom Resource -## Table of Contents - -* [DEP-04: EtcdMember Custom Resource](#dep-04-etcdmember-custom-resource) - * [Table of Contents](#table-of-contents) - * [Summary](#summary) - * [Terminology](#terminology) - * [Motivation](#motivation) - * [Goals](#goals) - * [Non-Goals](#non-goals) - * [Proposal](#proposal) - * [Etcd Member Metadata](#etcd-member-metadata) - * [Etcd Member State Transitions](#etcd-member-state-transitions) - * [States and Sub-States](#states-and-sub-states) - * [Top Level State Transitions](#top-level-state-transitions) - * [Starting an Etcd-Member in a Single-Node Etcd Cluster](#starting-an-etcd-member-in-a-single-node-etcd-cluster) - * [Addition of a New Etcd-Member in a Multi-Node Etcd Cluster](#addition-of-a-new-etcd-member-in-a-multi-node-etcd-cluster) - * [Restart of a Voting Etcd-Member in a Multi-Node Etcd Cluster](#restart-of-a-voting-etcd-member-in-a-multi-node-etcd-cluster) - * [Deterministic Etcd Member Creation/Restart During Scale-Up](#deterministic-etcd-member-creationrestart-during-scale-up) - * [TLS Enablement for Peer Communication](#tls-enablement-for-peer-communication) - * [Monitoring Backup Health](#monitoring-backup-health) - * [Enhanced Snapshot Compaction](#enhanced-snapshot-compaction) - * [Enhanced Defragmentation](#enhanced-defragmentation) - * [Monitoring Defragmentations](#monitoring-defragmentations) - * [Monitoring Restorations](#monitoring-restorations) - * [Monitoring Volume Mismatches](#monitoring-volume-mismatches) - * [Custom Resource API](#custom-resource-api) - * [Spec vs Status](#spec-vs-status) - * [Representing State Transitions](#representing-state-transitions) - * [Reason Codes](#reason-codes) - * [API](#api) - * [EtcdMember](#etcdmember) - * [Etcd](#etcd) - * [Lifecycle of an EtcdMember](#lifecycle-of-an-etcdmember) - * [Creation](#creation) - * [Updation](#updation) - * [Deletion](#deletion) - * [Reconciliation](#reconciliation) - * [Stale EtcdMember Status Handling](#stale-etcdmember-status-handling) - * [Reference](#reference) - ## Summary Today, [etcd-druid](https://github.com/gardener/etcd-druid) mainly acts as an etcd cluster provisioner, and seldom takes remediatory actions if the [etcd](https://etcd.io/) cluster goes into an undesired state that needs to be resolved by a human operator. In other words, etcd-druid cannot perform day-2 operations on etcd clusters in its current form, and hence cannot carry out its full set of responsibilities as a true "operator" of etcd clusters. For etcd-druid to be fully capable of its responsibilities, it must know the latest state of the etcd clusters and their individual members at all times. diff --git a/docs/proposals/05-etcd-operator-tasks.md b/docs/proposals/05-etcd-operator-tasks.md index 2f82ab54f..232f877ff 100644 --- a/docs/proposals/05-etcd-operator-tasks.md +++ b/docs/proposals/05-etcd-operator-tasks.md @@ -13,46 +13,6 @@ reviewers: # DEP-05: Operator Out-of-band Tasks -## Table of Contents - -- [DEP-05: Operator Out-of-band Tasks](#dep-05-operator-out-of-band-tasks) - - [Table of Contents](#table-of-contents) - - [Summary](#summary) - - [Terminology](#terminology) - - [Motivation](#motivation) - - [Goals](#goals) - - [Non-Goals](#non-goals) - - [Proposal](#proposal) - - [Custom Resource Golang API](#custom-resource-golang-api) - - [Spec](#spec) - - [Status](#status) - - [Custom Resource YAML API](#custom-resource-yaml-api) - - [Lifecycle](#lifecycle) - - [Creation](#creation) - - [Execution](#execution) - - [Deletion](#deletion) - - [Use Cases](#use-cases) - - [Recovery from permanent quorum loss](#recovery-from-permanent-quorum-loss) - - [Task Config](#task-config) - - [Pre-Conditions](#pre-conditions) - - [Trigger on-demand snapshot compaction](#trigger-on-demand-snapshot-compaction) - - [Possible scenarios](#possible-scenarios) - - [Task Config](#task-config-1) - - [Pre-Conditions](#pre-conditions-1) - - [Trigger on-demand full/delta snapshot](#trigger-on-demand-fulldelta-snapshot) - - [Possible scenarios](#possible-scenarios-1) - - [Task Config](#task-config-2) - - [Pre-Conditions](#pre-conditions-2) - - [Trigger on-demand maintenance of etcd cluster](#trigger-on-demand-maintenance-of-etcd-cluster) - - [Possible Scenarios](#possible-scenarios-2) - - [Task Config](#task-config-3) - - [Pre-Conditions](#pre-conditions-3) - - [Copy Backups Task](#copy-backups-task) - - [Possible Scenarios](#possible-scenarios-3) - - [Task Config](#task-config-4) - - [Pre-Conditions](#pre-conditions-4) - - [Metrics](#metrics) - ## Summary This DEP proposes an enhancement to `etcd-druid`'s capabilities to handle [out-of-band](#terminology) tasks, which are presently performed manually or invoked programmatically via suboptimal APIs. The document proposes the establishment of a unified interface by defining a well-structured API to harmonize the initiation of any `out-of-band` task, monitor its status, and simplify the process of adding new tasks and managing their lifecycles. diff --git a/docs/usage/managing-etcd-clusters.md b/docs/usage/managing-etcd-clusters.md index d05e0a60d..f6161d192 100644 --- a/docs/usage/managing-etcd-clusters.md +++ b/docs/usage/managing-etcd-clusters.md @@ -13,24 +13,24 @@ In order to track the progress of creation of etcd cluster resources you can do * `status.lastOperation` can be monitored to check the status of reconciliation. * Additional printer columns have been defined for `Etcd` custom resource. You can execute the following command to know if an `Etcd` cluster is ready/quorate. - ```bash - kubectl get etcd -n -owide +```bash +kubectl get etcd -n -owide # you will see additional columns which will indicate the state of an etcd cluster NAME READY QUORATE ALL MEMBERS READY BACKUP READY AGE CLUSTER SIZE CURRENT REPLICAS READY REPLICAS etcd-main true True True True 235d 3 3 3 - ``` +``` * You can additional monitor [all etcd cluster resources](../concepts/etcd-cluster-components.md) that are created for every etcd cluster. For etcd-druid version --selector=instance= - ``` +```bash +kubectl get all,cm,role,rolebinding,lease,sa -n --selector=instance= +``` For etcd-druid version >=v0.23.0 use the following command: - ```bash - kubectl get all,cm,role,rolebinding,lease,sa -n --selector=app.kubernetes.io/managed-by=etcd-druid,app.kubernetes.io/part-of= - ``` +```bash +kubectl get all,cm,role,rolebinding,lease,sa -n --selector=app.kubernetes.io/managed-by=etcd-druid,app.kubernetes.io/part-of= +``` @@ -60,6 +60,7 @@ Prior to v0.23.0 you can do this by using `--ignore-operation-annotation` CLI fl #### Explicit reconciliation If `--enable-etcd-spec-auto-reconcile` or `--ignore-operation-annotation` is set to false or not set at all, then any change to an `Etcd` resource will not be automatically reconciled. To trigger a reconcile you must set the following annotation on the `Etcd` resource: + ```bash kubectl annotate etcd gardener.cloud/operation=reconcile -n ``` @@ -126,7 +127,8 @@ spec: name: etcd-druid-images-overwrite ``` -> **NOTE:** Image overwrites specified in the mounted `ConfigMap` will be respected by successive reconciliations for this `Etcd` custom resource. +!!! info + Image overwrites specified in the mounted `ConfigMap` will be respected by successive reconciliations for this `Etcd` custom resource. **Option #2** @@ -137,16 +139,18 @@ We provide a generic way to suspend etcd cluster reconciliation via etcd-druid, `etcd` cluster resources are managed by `etcd-druid` and since v0.23.0 version of `etcd-druid` any changes to these managed resources are protected via a validating webhook. You can find more information about this webhook [here](../concepts/etcd-cluster-resource-protection.md). To be able to manually modify etcd cluster managed resources two things needs to be done: 1. Annotate the target `Etcd` resource suspending any reconciliation by `etcd-druid`. You can do this by invoking the following command: - ```bash + +```bash kubectl annotate etcd -n druid.gardener.cloud/suspend-etcd-spec-reconcile= - ``` +``` 2. Add another annotation to the target `Etcd` resource disabling managed resource protection via the webhook. You can do this by invoking the following command: - ```bash - kubectl annotate etcd etcd-main -n druid.gardener.cloud/disable-etcd-component-protection= - ``` +```bash + kubectl annotate etcd -n druid.gardener.cloud/disable-etcd-component-protection= +``` Now you are free to make changes to any managed etcd cluster resource. -> **NOTE:** As long as the above two annotations are there, no reconciliation will be done for this etcd cluster by `etcd-druid`. Therefore it is essential that you remove this annotations eventually.ß +!!! note + As long as the above two annotations are there, no reconciliation will be done for this etcd cluster by `etcd-druid`. Therefore it is essential that you remove this annotations eventually.ß diff --git a/docs/usage/recovering-etcd-clusters.md b/docs/usage/recovering-etcd-clusters.md index 424bd7f1e..b923de8a8 100644 --- a/docs/usage/recovering-etcd-clusters.md +++ b/docs/usage/recovering-etcd-clusters.md @@ -20,21 +20,23 @@ At present, recovery from a permanent quorum loss is achieved by manually execut > **Note:** In the near future etcd-druid will offer capability to automate the recovery from a permanent quorum loss via [Out-Of-Band Operator Tasks](https://github.com/gardener/etcd-druid/blob/90995898b231a49a8f211e85160600e9e6019fe0/docs/proposals/05-etcd-operator-tasks.md#recovery-from-permanent-quorum-loss). An operator only needs to ascertain that there is a permanent quorum loss and the etcd-cluster is beyond auto-recovery. Once that is established then an operator can invoke a task whose status an operator can check. -> :warning: Please note that manually restoring etcd can result in data loss. This guide is the last resort to bring an Etcd cluster up and running again. +!!! warning + Please note that manually restoring etcd can result in data loss. This guide is the last resort to bring an Etcd cluster up and running again. #### 00-Identify the etcd cluster It is possible to shard the etcd cluster based on resource types using [--etcd-servers-overrides](https://kubernetes.io/docs/reference/command-line-tools-reference/kube-apiserver/) CLI flag of `kube-apiserver`. Any sharding results in more than one etcd-cluster. -> **Note:** In `gardener`, each shoot control plane has two etcd clusters, `etcd-events` which only stores events and `etcd-main` - stores everything else except events. +!!! info + In `gardener`, each shoot control plane has two etcd clusters, `etcd-events` which only stores events and `etcd-main` - stores everything else except events. Identify the etcd-cluster which has a permanent quorum loss. Most of the resources of an etcd-cluster can be identified by its name. The resources of interest to recover from permanent quorum loss are: `Etcd` CR, `StatefulSet`, `ConfigMap` and `PVC`. > To identify the `ConfigMap` resource use the following command: > -> ```bash -> kubectl get sts -o jsonpath='{.spec.template.spec.volumes[?(@.name=="etcd-config-file")].configMap.name}' -> ``` +```bash + kubectl get sts -o jsonpath='{.spec.template.spec.volumes[?(@.name=="etcd-config-file")].configMap.name}' +``` #### 01-Prepare Etcd Resource to allow manual updates @@ -50,7 +52,7 @@ The above annotation will prevent any reconciliation by etcd-druid for this `Etc Add another annotation to the `Etcd` resource: ```bash -kubectl annotate etcd etcd-main -n druid.gardener.cloud/disable-etcd-component-protection= +kubectl annotate etcd -n druid.gardener.cloud/disable-etcd-component-protection= ``` The above annotation will allow manual edits to `Etcd` cluster resources that are managed by etcd-druid. diff --git a/docs/usage/securing-etcd-clusters.md b/docs/usage/securing-etcd-clusters.md index 2646c95b9..75ef5f6dc 100644 --- a/docs/usage/securing-etcd-clusters.md +++ b/docs/usage/securing-etcd-clusters.md @@ -6,12 +6,11 @@ This document will describe all the TLS artifacts that are typically generated f In order to undertand all the TLS artifacts that are required to setup etcd-druid and one or more etcd-clusters, one must have a clear view of all the communication channels that needs to be protected via TLS. In the diagram below all communication lines in a typical 3-node etcd cluster along with `kube-apiserver` and `etcd-druid` is illustrated. -> **Note:** For [Gardener](https://github.com/gardener/gardener) setup all the communication lines are TLS enabled. +!!! info + For [Gardener](https://github.com/gardener/gardener) setup all the communication lines are TLS enabled. communication-lines - - ## TLS artifacts An etcd cluster setup by `etcd-druid` leverages the following TLS artifacts: @@ -28,7 +27,8 @@ An etcd cluster setup by `etcd-druid` leverages the following TLS artifacts: * Certificate Authority used to sign server certificate key-pair for `etcd` peer communication specified via `etcd.spec.etcd.peerUrlTls.tlsCASecretRef`. * Server certificate key-pair specified via `etcd.spec.etcd.peerUrlTls.serverTLSSecretRef` used for `etcd` peer communication. -> **NOTE:** TLS artifacts should be created prior to creating `Etcd` clusters. `etcd-druid` currently does not provide a convenience way to generate these TLS artifacts. [etcd](https://etcd.io/docs/v3.4/op-guide/security/) recommends to use [cfssl](https://github.com/cloudflare/cfssl) to generate certificates. However you can use any other tool as well. We do provide a convenience script for local development [here](https://github.com/gardener/etcd-wrapper/blob/main/hack/local-dev/generate_pki.sh) which can be used to generate TLS artifacts. Currently this script is part of [etcd-wrapper](https://github.com/gardener/etcd-wrapper) github repository but we will harmonize these scripts to be used across all github projects under the `etcd-druid` ecosystem. +!!! note + TLS artifacts should be created prior to creating `Etcd` clusters. `etcd-druid` currently does not provide a convenience way to generate these TLS artifacts. [etcd](https://etcd.io/docs/v3.4/op-guide/security/) recommends to use [cfssl](https://github.com/cloudflare/cfssl) to generate certificates. However you can use any other tool as well. We do provide a convenience script for local development [here](https://github.com/gardener/etcd-wrapper/blob/main/hack/local-dev/generate_pki.sh) which can be used to generate TLS artifacts. Currently this script is part of [etcd-wrapper](https://github.com/gardener/etcd-wrapper) github repository but we will harmonize these scripts to be used across all github projects under the `etcd-druid` ecosystem. diff --git a/hack/api-reference/config.yaml b/hack/api-reference/config.yaml new file mode 100644 index 000000000..287e5583b --- /dev/null +++ b/hack/api-reference/config.yaml @@ -0,0 +1,8 @@ +processor: + ignoreFields: + - "TypeMeta$" + ignoreTypes: + - "List$" + - "ParseError$" +render: + kubernetesVersion: 1.29 \ No newline at end of file diff --git a/hack/demo/etcd-druid-demo.tape b/hack/demo/etcd-druid-demo.tape deleted file mode 100644 index 09d0d9606..000000000 --- a/hack/demo/etcd-druid-demo.tape +++ /dev/null @@ -1,194 +0,0 @@ -# Instructions to generate the demo in .mp4 - -# Run: -# $ vhs etcd-druid-demo-realtime.tape - -# The generated video will be extremely long since the etcd cluster takes more than 2 minutes for all members to become Ready. - -# Use ffmpeg to speed up those 2 minutes. - -# ENSURE to run the first ffmpeg command with the flags in the specified order. -# If this is not done, the first few seconds of the demo would get skipped- -# since the order of the flags matters to find the first key frame in ffmpeg. -# Let the timestamp from which you want the playback to be sped up be H1:M1:S1, -# and the timestamp till which you want the playback to be sped up be H2:M2:S2, -# and the speed multiplier be SPEED, -# and the end time of the video be END. -# See the instructions in the .tape commands to identify parts - -# Run: -# $ cp etcd-druid-demo-realtime.mp4 input.mp4 -# $ ffmpeg -ss 0 -i input.mp4 -to H1:M1:S1 -c copy part1.mp4 -# $ ffmpeg -i input.mp4 -ss H1:M1:S1 -to 00:03:34 -c copy part2.mp4 -# $ ffmpeg -i input.mp4 -ss H2:M2:S2 -to END -c copy part3.mp4 -# $ ffmpeg -i part2.mp4 -filter:v "setpts=PTS/SPEED" part2_fast.mp4 -# $ cat << EOF > concat_list.txt -# file 'part1.mp4' -# file 'part2_fast.mp4' -# file 'part3.mp4' -# EOF -# $ ffmpeg -f concat -safe 0 -i concat_list.txt -c copy output.mp4 -# $ cp output.mp4 etcd-druid-demo.mp4 - -Output etcd-druid-demo-realtime.mp4 - -Require make -Require kubectl -Require tmux - -Set FontSize 16 -Set FontFamily "JetBrainsMono Nerd Font" -Set Width 1200 -Set Height 800 -Set WindowBar Colorful -Set CursorBlink true -Set TypingSpeed 75ms -Set WaitTimeout 30s - -# Begin part 1 - -Type "# Target any kubernetes cluster." -Sleep 130ms -Enter - -Type "# For this demo, we set up a local kubernetes cluster using kind, through the `kind-up` make target:" -Sleep 130ms -Enter - -Type "make kind-up" -Sleep 130ms -Enter -Wait - -Type "# Target this kind cluster by exporting KUBECONFIG and pointing it to the kubeconfig path of the kind cluster as shown above." -Sleep 130ms -Enter - -Type@50ms "export KUBECONFIG=hack/e2e-test/infrastructure/kind/kubeconfig" -Sleep 260ms -Enter - -Type "# We now have to install the CRDs and deploy etcd-druid." -Sleep 130ms -Enter - -Type "# The `deploy` make target installs CRDs, and creates a Deployment for etcd-druid in the default namespace:" -Sleep 130ms -Enter - -Type "make deploy" -Sleep 130ms -Enter -# a magic number that makes it work, things break otherwise -# Find this number with `$ time make deploy` and add some buffer -Sleep 12s - -Type "kubectl get pod # Checking the etcd-druid pod." -Sleep 130ms -Enter -Wait - -Type "# etcd-druid has now been deployed; and is ready to provision, reconcile, and monitor etcd clusters." -Sleep 130ms -Enter - -Type "# To create an etcd-cluster, just define an etcd CR and deploy it." -Sleep 3s -Enter - -# setup tmux -Hide - Type "tmux -f /dev/null -L test new-session -- bash" Enter - Type "tmux split-window -d -- bash && \" Enter - Type "tmux set status && \" Enter - Type 'tmux setw pane-border-style "fg=0" && \' Enter - Type 'tmux setw pane-active-border-style "fg=0"' Enter - Sleep 0.5 - Ctrl+L - Sleep 1 -Show - -# pane 1 - Type "# Using a pre-defined sample, config/samples/druid_v1alpha1_etcd.yaml, which creates a 3 member etcd-cluster." - Sleep 130ms - Enter - - Type "kubectl apply -f config/samples/druid_v1alpha1_etcd.yaml" - Sleep 130ms -# pane 1 - -# switch pane to 2 -Hide -Ctrl+B -Type o -Show - -# pane 2 - Type "# Meanwhile, keeping a watch on some of the resources being provisioned by etcd-druid:" - Enter - Sleep 130ms - Type "watch --interval 5 kubectl get etcd,sts,pod" - Enter - Sleep 130ms -# pane 2 - -# switch pane to 1 -Hide -Ctrl+B -Type o -Show - -# pane 1 - -# End part 1 - Enter - -# Begin part 2 - Sleep 210s -# End part 2 - -# Begin part 3 - - Type "# The etcd cluster is now ready." - Sleep 130ms - Enter - - Type "# Bringing the etcd cluster down is done by simply deleting the CR:" - Sleep 130ms - Enter - - Type "kubectl delete etcd etcd-test" - Sleep 130ms - Enter - Sleep 10s - - Type "# All resources provisioned by etcd-druid have been brought down, as can be seen below." - Sleep 130ms - Enter -# pane 1 - -# kill pane 2 -Hide -Ctrl+B -Type o -Ctrl+C -Type "tmux kill-pane" -Enter -Show - -Type "# Shutting down the kind cluster:" -Sleep 130ms -Enter - -Type "make kind-down" -Sleep 130ms -Enter -Sleep 5s - -Hide -Type "tmux kill-session" -Sleep 130ms -Enter -Show - -# End part 3 diff --git a/hack/tools.mk b/hack/tools.mk index 06a5f442b..ef9087368 100644 --- a/hack/tools.mk +++ b/hack/tools.mk @@ -24,6 +24,7 @@ GO_APIDIFF := $(TOOLS_BIN_DIR)/go-apidiff GOTESTFMT := $(TOOLS_BIN_DIR)/gotestfmt GOIMPORTS_REVISER := $(TOOLS_BIN_DIR)/goimports-reviser YQ := $(TOOLS_BIN_DIR)/yq +CRD_REF_DOCS := $(TOOLS_BIN_DIR)/crd-ref-docs # default tool versions SKAFFOLD_VERSION := v2.13.0 @@ -41,7 +42,7 @@ GO_APIDIFF_VERSION ?= v0.8.2 GOTESTFMT_VERSION ?= v2.5.0 GOIMPORTS_REVISER_VERSION ?= v3.6.5 YQ_VERSION ?= v4.44.3 - +CRD_REF_DOCS_VERSION ?= v0.1.0 export TOOLS_BIN_DIR := $(TOOLS_BIN_DIR) export PATH := $(abspath $(TOOLS_BIN_DIR)):$(PATH) @@ -116,4 +117,7 @@ $(GOTESTFMT): GOBIN=$(abspath $(TOOLS_BIN_DIR)) go install github.com/gotesttools/gotestfmt/v2/cmd/gotestfmt@$(GOTESTFMT_VERSION) $(GOIMPORTS_REVISER): - GOBIN=$(abspath $(TOOLS_BIN_DIR)) go install github.com/incu6us/goimports-reviser/v3@$(GOIMPORTS_REVISER_VERSION) \ No newline at end of file + GOBIN=$(abspath $(TOOLS_BIN_DIR)) go install github.com/incu6us/goimports-reviser/v3@$(GOIMPORTS_REVISER_VERSION) + +$(CRD_REF_DOCS): + GOBIN=$(abspath $(TOOLS_BIN_DIR)) go install github.com/elastic/crd-ref-docs@$(CRD_REF_DOCS_VERSION) \ No newline at end of file diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 000000000..2b8e47c4f --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,113 @@ +# yaml-language-server: $schema=https://squidfunk.github.io/mkdocs-material/schema.json + +# Project Info +site_name: Etcd Druid +site_description: etcd druid documentation +repo_name: 'gardener/etcd-druid' +repo_url: 'https://github.com/gardener/etcd-druid' +use_directory_urls: false +theme: + name: material + logo: assets/logo/etcd-druid-whitebg.png + favicon: assets/logo/etcd-druid-transparentbg.png + icon: + repo: fontawesome/brands/github + features: + - navigation.tabs + - navigation.sections + - navigation.top + - navigation.footer + - search.suggest + - search.highlight + - content.tabs.link + - content.code.select + - content.code.copy + - content.code.annotate + - header.autohide + language: en + palette: + # Light mode + - media: "(prefers-color-scheme: light)" + scheme: default + primary: white + accent: blue + toggle: + icon: material/weather-night + name: Switch to dark mode + # Dark mode + - media: "(prefers-color-scheme: dark)" + scheme: slate + accent: amber + primary: black + toggle: + icon: material/weather-sunny + name: Switch to light mode + font: + text: Roboto + code: Roboto Mono +extra_css: + - assets/css/custom.css +markdown_extensions: + - admonition + - pymdownx.highlight: + anchor_linenums: true + line_spans: __span + pygments_lang_class: true + auto_title: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + - codehilite + - abbr + - attr_list + - toc: + permalink: true + - pymdownx.tabbed: + alternate_style: true + - pymdownx.emoji: + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg +nav: + - Overview: index.md + - Deployment: + - Getting Started: deployment/getting-started-locally/getting-started-locally.md + - Configuring Etcd Druid: deployment/configure-etcd-druid.md + - Feature Gates: deployment/feature-gates.md + - Version Compatibility Matrix: deployment/version-compatibility-matrix.md + - Productive Setup Recommendations: deployment/production-setup-recommendations.md + - Usage: + - Managing Etcd clusters: usage/managing-etcd-clusters.md + - Recovering Etcd clusters: usage/recovering-etcd-clusters.md + - Securing Etcd clusters: usage/securing-etcd-clusters.md + - Concepts: + - Components in an Etcd cluster: concepts/etcd-cluster-components.md + - Protecting resources in Etcd cluster: concepts/etcd-cluster-resource-protection.md + - Development: + - Controllers: development/controllers.md + - Getting Started: development/getting-started-locally.md + - Prepare Dev Environment: development/prepare-dev-environment.md + - Contribution Guide: development/contribution.md + - Changing API: development/changing-api.md + - Adding New Etcd Cluster Component: development/add-new-etcd-cluster-component.md + - Raising a PR: development/raising-a-pr.md + - Manage Dependencies: development/dependency-management.md + - Testing: development/testing.md + - API Reference: + - Etcd Druid API: api-reference/etcd-druid-api.md + - Monitoring: + - Metrics: monitoring/metrics.md + - Proposals: + - Multi-Node Etcd Clusters: proposals/01-multi-node-etcd-clusters.md + - Snapshot Compaction: proposals/02-snapshot-compaction.md + - Scaling Etcd Clusters: proposals/03-scaling-up-an-etcd-cluster.md + - Etcd Member: proposals/04-etcd-member-custom-resource.md + - Etcd Operator Tasks: proposals/05-etcd-operator-tasks.md +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/gardener/etcd-druid + generator: true +plugins: + - search + - glightbox + - mkdocs_pymdownx_material_extras \ No newline at end of file