diff --git a/terraform/aptos-node-testnet/aws/variables.tf b/terraform/aptos-node-testnet/aws/variables.tf index 9ce02d893d623..045e570358680 100644 --- a/terraform/aptos-node-testnet/aws/variables.tf +++ b/terraform/aptos-node-testnet/aws/variables.tf @@ -175,7 +175,7 @@ variable "utility_instance_type" { variable "validator_instance_type" { description = "Instance type used for validator and fullnodes" type = string - default = "c6i.8xlarge" + default = "c6i.16xlarge" } ### Forge diff --git a/terraform/aptos-node-testnet/gcp/variables.tf b/terraform/aptos-node-testnet/gcp/variables.tf index 880be3978563d..c737e4250b0be 100644 --- a/terraform/aptos-node-testnet/gcp/variables.tf +++ b/terraform/aptos-node-testnet/gcp/variables.tf @@ -218,7 +218,7 @@ variable "utility_instance_type" { variable "validator_instance_type" { description = "Instance type used for validator and fullnodes" type = string - default = "t2d-standard-16" + default = "t2d-standard-60" } variable "utility_instance_enable_taint" { diff --git a/terraform/aptos-node/aws/variables.tf b/terraform/aptos-node/aws/variables.tf index be2e004521b4f..61acb59671786 100644 --- a/terraform/aptos-node/aws/variables.tf +++ b/terraform/aptos-node/aws/variables.tf @@ -202,7 +202,7 @@ variable "utility_instance_enable_taint" { variable "validator_instance_type" { description = "Instance type used for validator and fullnodes" type = string - default = "c6i.8xlarge" + default = "c6i.16xlarge" } variable "validator_instance_num" { diff --git a/terraform/aptos-node/gcp/variables.tf b/terraform/aptos-node/gcp/variables.tf index fbea82b870eb8..7c13a02ddec7a 100644 --- a/terraform/aptos-node/gcp/variables.tf +++ b/terraform/aptos-node/gcp/variables.tf @@ -196,7 +196,7 @@ variable "utility_instance_type" { variable "validator_instance_type" { description = "Instance type used for validator and fullnodes" type = string - default = "t2d-standard-16" + default = "t2d-standard-60" } variable "utility_instance_enable_taint" { diff --git a/terraform/fullnode/aws/addons.tf b/terraform/fullnode/aws/addons.tf index 52bb764c77565..da987d7694912 100644 --- a/terraform/fullnode/aws/addons.tf +++ b/terraform/fullnode/aws/addons.tf @@ -78,11 +78,6 @@ resource "helm_release" "pfn-addons" { acm_certificate = var.zone_id != "" ? aws_acm_certificate.ingress[0].arn : null loadBalancerSourceRanges = var.client_sources_ipv4 } - load_test = { - config = { - numFullnodeGroups = var.num_fullnodes - } - } }), jsonencode(var.pfn_helm_values), ] diff --git a/terraform/fullnode/aws/variables.tf b/terraform/fullnode/aws/variables.tf index f687206cbafc6..e8eadb9a84cd0 100644 --- a/terraform/fullnode/aws/variables.tf +++ b/terraform/fullnode/aws/variables.tf @@ -134,7 +134,7 @@ variable "utility_instance_type" { variable "fullnode_instance_type" { description = "Instance type used for validator and fullnodes" type = string - default = "c6i.8xlarge" + default = "c6i.16xlarge" } variable "num_extra_instance" { diff --git a/terraform/fullnode/gcp/addons.tf b/terraform/fullnode/gcp/addons.tf index de354a72a76be..7adf760a64504 100644 --- a/terraform/fullnode/gcp/addons.tf +++ b/terraform/fullnode/gcp/addons.tf @@ -85,11 +85,6 @@ resource "helm_release" "pfn-addons" { gce_managed_certificate_domains = var.create_google_managed_ssl_certificate ? join(",", distinct(concat([local.domain], var.tls_sans))) : "" # loadBalancerSourceRanges = var.client_sources_ipv4 # not supported yet } - load_test = { - config = { - numFullnodeGroups = var.num_fullnodes - } - } }), jsonencode(var.pfn_helm_values), ] diff --git a/terraform/fullnode/gcp/variables.tf b/terraform/fullnode/gcp/variables.tf index 5954c15a7bd7c..ce9feedbf3941 100644 --- a/terraform/fullnode/gcp/variables.tf +++ b/terraform/fullnode/gcp/variables.tf @@ -119,7 +119,7 @@ variable "utility_instance_type" { variable "fullnode_instance_type" { description = "Instance type used for validator and fullnodes" type = string - default = "t2d-standard-16" + default = "t2d-standard-60" } variable "utility_instance_enable_taint" { diff --git a/terraform/helm/aptos-node/files/haproxy.cfg b/terraform/helm/aptos-node/files/haproxy.cfg index 06433385fedda..6b4a0f2642889 100644 --- a/terraform/helm/aptos-node/files/haproxy.cfg +++ b/terraform/helm/aptos-node/files/haproxy.cfg @@ -22,9 +22,10 @@ defaults # Set the default mode to TCP mode tcp + log-format "%ci:%cp [%t] %ft %b/%s %Tw/%Tc/%Tt %B %ts %ac/%fc/%bc/%sc/%rc %sq/%bq" # Don't log normal events - option dontlog-normal + # option dontlog-normal # Set timeouts for connections timeout client 60s diff --git a/terraform/helm/aptos-node/templates/fullnode.yaml b/terraform/helm/aptos-node/templates/fullnode.yaml index edd966f30e0e9..e3cf7572e4864 100644 --- a/terraform/helm/aptos-node/templates/fullnode.yaml +++ b/terraform/helm/aptos-node/templates/fullnode.yaml @@ -107,9 +107,6 @@ spec: {{- if $.Values.metrics.destination }} aptos.dev/metrics-destination: {{ $.Values.metrics.destination }} {{- end}} - {{- with $.Values.validator.podAnnotations }} - {{- toYaml $.Values.validator.podAnnotations | nindent 8 }} - {{- end }} spec: securityContext: seccompProfile: diff --git a/terraform/helm/aptos-node/templates/validator.yaml b/terraform/helm/aptos-node/templates/validator.yaml index 05494f1e9820c..65dc4f8b5c8c7 100644 --- a/terraform/helm/aptos-node/templates/validator.yaml +++ b/terraform/helm/aptos-node/templates/validator.yaml @@ -87,9 +87,6 @@ spec: {{- if $.Values.metrics.destination }} aptos.dev/metrics-destination: {{ $.Values.metrics.destination }} {{- end}} - {{- with $.Values.fullnode.podAnnotations }} - {{- toYaml $.Values.fullnode.podAnnotations | nindent 8 }} - {{- end }} spec: securityContext: seccompProfile: diff --git a/terraform/helm/aptos-node/values.yaml b/terraform/helm/aptos-node/values.yaml index 1b5887dddb78b..edb04a3a7d0ef 100644 --- a/terraform/helm/aptos-node/values.yaml +++ b/terraform/helm/aptos-node/values.yaml @@ -87,9 +87,10 @@ validator: affinity: {} # -- Validator configuration. See NodeConfig https://github.com/aptos-labs/aptos-core/blob/main/config/src/config/mod.rs config: {} - + # -- DEPRECATED: it's broken with Cillium a.k.a. GKE DataplaneV2. + # -- templates/networkpolicy.yaml kept around for reference in case we want to resurrect it. # -- Lock down network ingress and egress with Kubernetes NetworkPolicy - enableNetworkPolicy: true + enableNetworkPolicy: false fullnode: # -- Specify fullnode groups by `name` and number of `replicas` diff --git a/terraform/helm/fullnode/templates/backup-compaction.yaml b/terraform/helm/fullnode/templates/backup-compaction.yaml index 466191d66681c..574095d6629ac 100644 --- a/terraform/helm/fullnode/templates/backup-compaction.yaml +++ b/terraform/helm/fullnode/templates/backup-compaction.yaml @@ -61,7 +61,7 @@ spec: fieldRef: fieldPath: metadata.name - name: PUSH_METRICS_ENDPOINT - value: "{{- include "backup.pushMetricsEndpoint" $ }}/api/v1/import/prometheus?extra_label=role={{- .jobName | default "db_backup_compaction" }}&extra_label=kubernetes_pod_name=$(KUBERNETES_POD_NAME)" + value: "{{- include "backup.pushMetricsEndpoint" $ }}/api/v1/import/prometheus?extra_label=role={{- .jobName | default "db_backup_compaction" }}&extra_label=kubernetes_pod_name=$(KUBERNETES_POD_NAME)&extra_label=chain_name={{ $.Values.chain.label | default $.Values.chain.name }}" {{- end }} {{- include "backup.backupEnvironment" (dict "config" $.Values.backup.config "era" $.Values.chain.era) | nindent 12 }} {{- with .Values.backup_compaction }} diff --git a/terraform/helm/fullnode/templates/backup-verify.yaml b/terraform/helm/fullnode/templates/backup-verify.yaml index deaf64b7650db..28f25d00385c8 100644 --- a/terraform/helm/fullnode/templates/backup-verify.yaml +++ b/terraform/helm/fullnode/templates/backup-verify.yaml @@ -59,7 +59,7 @@ spec: fieldRef: fieldPath: metadata.name - name: PUSH_METRICS_ENDPOINT - value: "{{- include "backup.pushMetricsEndpoint" $ }}/api/v1/import/prometheus?extra_label=role={{- .jobName | default "db_backup_verify" }}&extra_label=kubernetes_pod_name=$(KUBERNETES_POD_NAME)" + value: "{{- include "backup.pushMetricsEndpoint" $ }}/api/v1/import/prometheus?extra_label=role={{- .jobName | default "db_backup_verify" }}&extra_label=kubernetes_pod_name=$(KUBERNETES_POD_NAME)&extra_label=chain_name={{ $.Values.chain.label | default $.Values.chain.name }}" {{- end }} {{- include "backup.backupEnvironment" (dict "config" $.Values.backup.config "era" $.Values.chain.era) | nindent 12 }} {{- with .Values.backup_verify }} diff --git a/terraform/helm/fullnode/templates/backup.yaml b/terraform/helm/fullnode/templates/backup.yaml index 4911c4fa18922..9a80cbd4020ed 100644 --- a/terraform/helm/fullnode/templates/backup.yaml +++ b/terraform/helm/fullnode/templates/backup.yaml @@ -75,7 +75,7 @@ spec: fieldRef: fieldPath: metadata.name - name: PUSH_METRICS_ENDPOINT - value: "{{- include "backup.pushMetricsEndpoint" $ }}/api/v1/import/prometheus?extra_label=role={{- .jobName | default "db_backup" }}&extra_label=kubernetes_pod_name=$(KUBERNETES_POD_NAME)" + value: "{{- include "backup.pushMetricsEndpoint" $ }}/api/v1/import/prometheus?extra_label=role={{- .jobName | default "db_backup" }}&extra_label=kubernetes_pod_name=$(KUBERNETES_POD_NAME)&extra_label=chain_name={{ $.Values.chain.label | default $.Values.chain.name }}" {{- end }} {{- include "backup.backupEnvironment" (dict "config" .config "era" $.Values.chain.era) | nindent 8 }} volumeMounts: diff --git a/terraform/helm/pfn-addons/README.md b/terraform/helm/pfn-addons/README.md index c669ca4b7aa63..8f51c163dce31 100644 --- a/terraform/helm/pfn-addons/README.md +++ b/terraform/helm/pfn-addons/README.md @@ -18,31 +18,8 @@ Additional components for a public fullnode fleet deployment | ingress.gce_security_policy | string | `nil` | Security policy to apply to the backend services behind the ingress | | ingress.health_check_duration_secs | string | `nil` | The maximum number of seconds that a PFN is allowed to be behind to be considered healthy and be allowed to serve traffic | | ingress.loadBalancerSourceRanges | string | `nil` | | +| ingress.logging.enabled | bool | `false` | | | ingress.wafAclArn | string | `nil` | | -| load_test.affinity | object | `{}` | | -| load_test.config.duration | int | `300` | How long to emit transactions for | -| load_test.config.expected_max_txns | int | `6000000` | Default 20k * $duration | -| load_test.config.max_transactions_per_account | int | `5` | | -| load_test.config.mempool_backlog | int | `5000` | Number of transactions outstanding in mempool | -| load_test.config.mint_key | string | `nil` | The private key used to mint to fund load test | -| load_test.config.numFullnodeGroups | string | `nil` | The number of fullnode groups to run traffic against | -| load_test.config.target_tps | int | `0` | Whether to target a constant TPS, or 0 if not used. Cannot be used with mempool_backlog. | -| load_test.config.transaction_type | string | `"coin-transfer"` | | -| load_test.config.txn_expiration_time_secs | int | `30` | How long to wait for transactions to be expired | -| load_test.config.use_pfns | bool | `true` | If true, run $numFullnodeGroups parallel load tests | -| load_test.config.use_validators | bool | `false` | Whether to submit transactions through validator REST API | -| load_test.enabled | bool | `false` | Whether to enable the load test CronJob | -| load_test.fullnode | object | `{"groups":[{"name":"fullnode"}]}` | The fullnode groups to target | -| load_test.image.pullPolicy | string | `"IfNotPresent"` | Image pull policy to use for tools image | -| load_test.image.repo | string | `"aptoslabs/tools"` | Image repo to use for tools image for running load tests | -| load_test.image.tag | string | `nil` | Image tag to use for tools image | -| load_test.intervalMins | int | `15` | How many minutes between load test runs | -| load_test.nodeSelector | object | `{}` | | -| load_test.resources.limits.cpu | int | `4` | | -| load_test.resources.limits.memory | string | `"4Gi"` | | -| load_test.resources.requests.cpu | int | `4` | | -| load_test.resources.requests.memory | string | `"4Gi"` | | -| load_test.tolerations | list | `[]` | | | service.aws_tags | string | `nil` | | | service.domain | string | `nil` | | | service.enableOnchainDiscovery | bool | `false` | | @@ -53,4 +30,4 @@ Additional components for a public fullnode fleet deployment | serviceAccount.name | string | `nil` | The name of the service account to use. If not set and create is true, a name is generated using the fullname template | ---------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.13.1](https://github.com/norwoodj/helm-docs/releases/v1.13.1) +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) diff --git a/terraform/helm/pfn-addons/templates/loadtest.yaml b/terraform/helm/pfn-addons/templates/loadtest.yaml deleted file mode 100644 index e5a6048aa197a..0000000000000 --- a/terraform/helm/pfn-addons/templates/loadtest.yaml +++ /dev/null @@ -1,123 +0,0 @@ -{{- if .Values.load_test.enabled }} -apiVersion: batch/v1 -kind: CronJob -metadata: - name: {{ include "pfn-addons.fullname" . }}-load-test - labels: - {{- include "pfn-addons.labels" . | nindent 4 }} - app.kubernetes.io/name: load-test -spec: - concurrencyPolicy: Replace - schedule: {{ printf "*/%d * * * *" (int .Values.load_test.intervalMins) | quote }} - jobTemplate: - spec: - template: - metadata: - labels: - {{- include "pfn-addons.selectorLabels" . | nindent 12 }} - app.kubernetes.io/name: load-test - spec: - restartPolicy: Never - priorityClassName: {{ include "pfn-addons.fullname" . }}-high - containers: - - name: load-test - image: {{ .Values.load_test.image.repo }}:{{ .Values.load_test.image.tag | default .Values.imageTag }} - imagePullPolicy: {{ .Values.load_test.image.pullPolicy }} - command: - - aptos-transaction-emitter - - emit-tx - - --mint-key={{ .Values.load_test.config.mint_key }} - - --chain-id={{ .Values.load_test.config.chain_id }} - # Build targets args for internal cluster targets - {{- $numTargets := 0 }} - {{- $targetSuffix := "" }} - {{- $targetGroups := list }} - {{- if $.Values.load_test.config.use_pfns }} - {{- $numTargets = $.Values.load_test.config.numFullnodeGroups }} - {{- $targetSuffix = "fullnode" }} - {{- $targetGroups = list }} - {{- else if $.Values.load_test.config.use_validators }} - {{- $numTargets = $.Values.genesis.numValidators }} - {{- $targetSuffix = "validator" }} - {{- $targetGroups = list }} - {{- else }} - {{- $numTargets = $.Values.load_test.config.numFullnodeGroups }} - {{- $targetSuffix = "fullnode" }} - {{- $targetGroups = $.Values.load_test.fullnode.groups }} - {{- end }} - {{- if $.Values.load_test.config.use_pfns }} - {{- range $i := until (int $numTargets) }} - - --targets=http://{{ printf "fullnode%d.%s" $i $.Values.service.domain }} - # - --targets=https://{{ printf "%s" $.Values.service.domain }} - {{- end }} - {{- else }} - {{- range $i := until (int $numTargets) }} - {{- $port := 80 }} - {{- if $targetGroups }} - {{- range $group := $targetGroups }} - {{- $nodeName := join "-" (list $.Values.genesis.username_prefix $i $group.name "lb") }} - - --targets=http://{{ $nodeName }}:{{ $port }} - {{- end }} - {{- else }} - {{- $nodeName := join "-" (list $.Values.genesis.username_prefix $i $targetSuffix "lb") }} - - --targets=http://{{ $nodeName }}:{{ $port }} - {{- end }} - {{- end }} - {{- end }} - {{- with .Values.load_test }} - # Either provide target TPS or mempool backlog - {{- if gt (int .config.target_tps) 0 }} - - --target-tps={{ .config.target_tps }} - {{- else }} - - --mempool-backlog={{ .config.mempool_backlog }} - {{- end }} - - --duration={{ .config.duration }} - # - --delay-after-minting=300 - - --expected-max-txns={{ .config.expected_max_txns }} - - --txn-expiration-time-secs={{ .config.txn_expiration_time_secs }} - - --max-transactions-per-account={{ .config.max_transactions_per_account }} - - --transaction-type={{ .config.transaction_type }} - env: - - name: RUST_BACKTRACE - value: "full" - - name: REUSE_ACC - value: "1" - {{- with .resources }} - resources: - {{- toYaml . | nindent 14 }} - {{- end }} - securityContext: - readOnlyRootFilesystem: true - allowPrivilegeEscalation: false - capabilities: - drop: - - ALL - seccompProfile: - type: RuntimeDefault - {{- with .nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .affinity }} - affinity: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .tolerations }} - tolerations: - {{- toYaml . | nindent 12 }} - {{- end }} - securityContext: - runAsNonRoot: true - runAsUser: 6180 - runAsGroup: 6180 - fsGroup: 6180 - # sysctls: - # - name: net.ipv4.tcp_tw_reuse - # value: "1" - {{- end }} - serviceAccountName: {{ include "pfn-addons.serviceAccountName" . }} - {{- if .Values.imagePullSecret }} - imagePullSecrets: - - name: {{.Values.imagePullSecret}} - {{- end }} -{{- end }} diff --git a/terraform/helm/pfn-addons/templates/service.yaml b/terraform/helm/pfn-addons/templates/service.yaml index c4f6e7f4f984e..18cf919461d79 100644 --- a/terraform/helm/pfn-addons/templates/service.yaml +++ b/terraform/helm/pfn-addons/templates/service.yaml @@ -39,6 +39,8 @@ spec: securityPolicy: name: {{ .Values.ingress.gce_security_policy }} {{- end }} + logging: + enable: {{ .Values.ingress.logging.enabled }} connectionDraining: drainingTimeoutSec: 30 healthCheck: diff --git a/terraform/helm/pfn-addons/values.yaml b/terraform/helm/pfn-addons/values.yaml index 3c5cdf5cb1cdc..16954125c6b81 100644 --- a/terraform/helm/pfn-addons/values.yaml +++ b/terraform/helm/pfn-addons/values.yaml @@ -22,6 +22,8 @@ ingress: loadBalancerSourceRanges: enableStickyness: true cookieDurationSeconds: 86400 + logging: + enabled: false # the below only work for gce ingress gce_managed_certificate: gce_managed_certificate_domains: @@ -32,51 +34,3 @@ ingress: # -- The maximum number of seconds that a PFN is allowed to be behind # to be considered healthy and be allowed to serve traffic health_check_duration_secs: - -load_test: - # -- Whether to enable the load test CronJob - enabled: false - image: - # -- Image repo to use for tools image for running load tests - repo: aptoslabs/tools - # -- Image tag to use for tools image - tag: - # -- Image pull policy to use for tools image - pullPolicy: IfNotPresent - resources: - limits: - cpu: 4 - memory: 4Gi - requests: - cpu: 4 - memory: 4Gi - nodeSelector: {} - tolerations: [] - affinity: {} - # -- How many minutes between load test runs - intervalMins: 15 - # -- The fullnode groups to target - fullnode: - groups: - - name: fullnode - config: - # -- The number of fullnode groups to run traffic against - numFullnodeGroups: - # -- The private key used to mint to fund load test - mint_key: - # -- Number of transactions outstanding in mempool - mempool_backlog: 5000 - # -- Whether to target a constant TPS, or 0 if not used. Cannot be used with mempool_backlog. - target_tps: 0 - # -- How long to emit transactions for - duration: 300 - # -- How long to wait for transactions to be expired - txn_expiration_time_secs: 30 - # -- Whether to submit transactions through validator REST API - use_validators: false - # -- If true, run $numFullnodeGroups parallel load tests - use_pfns: true - # -- Default 20k * $duration - expected_max_txns: 6000000 - max_transactions_per_account: 5 - transaction_type: coin-transfer diff --git a/terraform/helm/vector-log-agent/files/vector-transforms.yaml b/terraform/helm/vector-log-agent/files/vector-transforms.yaml index 943e800b44f39..c703a783f6f80 100644 --- a/terraform/helm/vector-log-agent/files/vector-transforms.yaml +++ b/terraform/helm/vector-log-agent/files/vector-transforms.yaml @@ -120,27 +120,27 @@ transforms: source: | . = flatten(., ".") # in order for fields to become individual, filterable top-level fields in uptrace we need to flatten nested objects into top-level keys. .service_name = .k8s.labels.app - .repo = "gcp" - - datadog_logs: - type: remap - inputs: - - final_logs - source: | - .ddsource = "k8s" - if is_string(.k8s.labels.app) { - .service = .k8s.labels.app - } - .ddtags, _ = "kube_cluster_name:" + .k8s.cluster + ",kube_namespace:" + .k8s.namespace + ",pod_name:" + .k8s.pod_name - - signoz_logs: - type: remap - inputs: - - final_logs - source: | - .severity_text = del(.level) - .source_type = "k8s" - tmp = { "k8s": del(.k8s)} - resources_tmp = flatten(tmp, ".") - . = flatten(., ".") - .resources = resources_tmp + .repo = "k8s" + + # datadog_logs: + # type: remap + # inputs: + # - final_logs + # source: | + # .ddsource = "k8s" + # if is_string(.k8s.labels.app) { + # .service = .k8s.labels.app + # } + # .ddtags, _ = "kube_cluster_name:" + .k8s.cluster + ",kube_namespace:" + .k8s.namespace + ",pod_name:" + .k8s.pod_name + + # signoz_logs: + # type: remap + # inputs: + # - final_logs + # source: | + # .severity_text = del(.level) + # .source_type = "k8s" + # tmp = { "k8s": del(.k8s)} + # resources_tmp = flatten(tmp, ".") + # . = flatten(., ".") + # .resources = resources_tmp