diff --git a/.github/workflows/cross-cloud-tests.yaml b/.github/workflows/cross-cloud-tests.yaml new file mode 100644 index 0000000000..f1628f3f3d --- /dev/null +++ b/.github/workflows/cross-cloud-tests.yaml @@ -0,0 +1,226 @@ +name: Cross-Cloud Chainsaw Tests + +on: + schedule: + - cron: '0 0 * * *' # Nightly run at midnight + workflow_dispatch: # Manual trigger + + +permissions: + id-token: write + contents: read + +jobs: + build-and-push-images: + permissions: + id-token: write + contents: read + name: Build and Push Docker Images + runs-on: warp-ubuntu-latest-x64-8x-spot + steps: + - name: Checkout Code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + + - name: Configure AWS credentials from OIDC + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::061717858829:role/ecr-pull-push-role + aws-region: us-east-1 + + + - name: Login to Amazon ECR + run: | + aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws + + - name: Build and Tag Docker Images + env: + COMMIT_HASH: ${{ github.sha }} + run: | + # Build images + make build-images TAG=${COMMIT_HASH} + # Tag images for public ECR + docker tag keyval/odigos-collector:${COMMIT_HASH} public.ecr.aws/y2v0v6s7/keyval/odigos-collector:${COMMIT_HASH} + docker tag keyval/odigos-instrumentor:${COMMIT_HASH} public.ecr.aws/y2v0v6s7/keyval/odigos-instrumentor:${COMMIT_HASH} + docker tag keyval/odigos-ui:${COMMIT_HASH} public.ecr.aws/y2v0v6s7/keyval/odigos-ui:${COMMIT_HASH} + docker tag keyval/odigos-scheduler:${COMMIT_HASH} public.ecr.aws/y2v0v6s7/keyval/odigos-scheduler:${COMMIT_HASH} + docker tag keyval/odigos-autoscaler:${COMMIT_HASH} public.ecr.aws/y2v0v6s7/keyval/odigos-autoscaler:${COMMIT_HASH} + docker tag keyval/odigos-odiglet:${COMMIT_HASH} public.ecr.aws/y2v0v6s7/keyval/odigos-odiglet:${COMMIT_HASH} + + docker push public.ecr.aws/y2v0v6s7/keyval/odigos-collector:${COMMIT_HASH} + docker push public.ecr.aws/y2v0v6s7/keyval/odigos-instrumentor:${COMMIT_HASH} + docker push public.ecr.aws/y2v0v6s7/keyval/odigos-ui:${COMMIT_HASH} + docker push public.ecr.aws/y2v0v6s7/keyval/odigos-scheduler:${COMMIT_HASH} + docker push public.ecr.aws/y2v0v6s7/keyval/odigos-autoscaler:${COMMIT_HASH} + docker push public.ecr.aws/y2v0v6s7/keyval/odigos-odiglet:${COMMIT_HASH} + + test: + permissions: + id-token: write + contents: read + needs: build-and-push-images + runs-on: warp-ubuntu-latest-x64-8x-spot + strategy: + matrix: + cloud-provider: [aks] # Add or remove providers as needed [TODO: later add -> eks + gke] + test-scenario: [multi-apps, helm-chart] # Add or remove scenarios as needed + + steps: + + - name: Configure AWS credentials from OIDC + uses: aws-actions/configure-aws-credentials@v4 + with: + role-to-assume: arn:aws:iam::061717858829:role/ecr-pull-push-role + aws-region: us-east-1 + + - name: Login to Amazon ECR + run: | + aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws + + - name: Checkout Code + uses: actions/checkout@v4 + + - name: Set Environment Variables for Terraform + run: | + CLUSTER_NAME="${{ matrix.test-scenario }}-${{ github.run_id }}" + echo "CLUSTER_NAME=${CLUSTER_NAME}" >> $GITHUB_ENV + echo "TF_VAR_cluster_name=${CLUSTER_NAME}" >> $GITHUB_ENV + echo "TF_VAR_resource_group_name=${CLUSTER_NAME}" >> $GITHUB_ENV + echo "TF_VAR_test_scenario=${{ matrix.test-scenario }}" >> $GITHUB_ENV + echo "TF_VAR_run_id=${{ github.run_id }}" >> $GITHUB_ENV + + + - name: Configure Cloud Provider + run: | + if [ "${{ matrix.cloud-provider }}" = "aks" ]; then + echo "Configuring for AKS" + + # Set environment variables for Azure provider + echo "ARM_CLIENT_ID=${{ secrets.AZURE_CLIENT_ID }}" >> $GITHUB_ENV + echo "ARM_CLIENT_SECRET=${{ secrets.AZURE_CLIENT_SECRET }}" >> $GITHUB_ENV + echo "ARM_TENANT_ID=${{ secrets.AZURE_TENANT_ID }}" >> $GITHUB_ENV + echo "ARM_SUBSCRIPTION_ID=${{ secrets.AZURE_SUBSCRIPTION_ID }}" >> $GITHUB_ENV + + az login --service-principal -u ${{ secrets.AZURE_CLIENT_ID }} -p ${{ secrets.AZURE_CLIENT_SECRET }} --tenant ${{ secrets.AZURE_TENANT_ID }} + az account set --subscription ${{ secrets.AZURE_SUBSCRIPTION_ID }} + + elif [ "${{ matrix.cloud-provider }}" = "eks" ]; then + echo "Configuring for EKS" + aws configure set aws_access_key_id ${{ secrets.AWS_ACCESS_KEY_ID }} + aws configure set aws_secret_access_key ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws configure set region us-east-1 + + elif [ "${{ matrix.cloud-provider }}" = "gke" ]; then + echo "Configuring for GKE" + echo "${{ secrets.GCP_SERVICE_ACCOUNT_KEY }}" | base64 --decode > gcp-key.json + gcloud auth activate-service-account --key-file=gcp-key.json + gcloud config set project ${{ secrets.GCP_PROJECT_ID }} + + else + echo "Unknown cloud provider: ${{ matrix.cloud-provider }}" + exit 1 + fi + + - uses: opentofu/setup-opentofu@v1 + + - name: Set Terraform Directory Based on Cloud Provider + run: | + if [ "${{ matrix.cloud-provider }}" == "aks" ]; then + echo "TF_DIR=./tests-infrastructure/terraform/aks" >> $GITHUB_ENV + elif [ "${{ matrix.cloud-provider }}" == "eks" ]; then + echo "TF_DIR=./tests-infrastructure/terraform/eks" >> $GITHUB_ENV + elif [ "${{ matrix.cloud-provider }}" == "gke" ]; then + echo "TF_DIR=./tests-infrastructure/terraform/gke" >> $GITHUB_ENV + else + echo "Unknown cloud provider" + exit 1 + fi + + - name: Initialize OpenTofu + run: tofu -chdir=$TF_DIR init + + + - name: Plan OpenTofu + run: tofu -chdir=$TF_DIR plan + + - name: Apply OpenTofu Configuration + run: | + tofu -chdir=$TF_DIR apply -auto-approve + + - name: Get kubeconfig for AKS/EKS/GKE + run: | + if [ "${{ matrix.cloud-provider }}" == "aks" ]; then + echo "Fetching AKS kubeconfig..." + az aks get-credentials --resource-group $CLUSTER_NAME --name $CLUSTER_NAME + elif [ "${{ matrix.cloud-provider }}" == "eks" ]; then + echo "Fetching EKS kubeconfig..." + elif [ "${{ matrix.cloud-provider }}" == "gke" ]; then + echo "Fetching GKE kubeconfig..." + else + echo "Unknown cloud provider" + exit 1 + fi + + - name: Verify cluster Access + run: | + kubectl get nodes || exit 1 + + - name: Install Chainsaw + uses: kyverno/action-install-chainsaw@v0.2.8 + + - name: Build CLI + run: | + cd cli + go build -tags=embed_manifests -o odigos + chmod +x odigos + + - name: Install FE + # this is used for cypress tests which are not run in every scenario + if: matrix.test-scenario == 'multi-apps' || matrix.test-scenario == 'helm-chart' || matrix.test-scenario == 'fe-synthetic' + run: | + cd frontend/webapp + yarn install + + - name: Run E2E Tests + run: | + + # This uses in chainsaw to split e2e tests from cross cloud tests + export MODE=cross-cloud-tests + + # This uses in chainsaw to verify the odigos version is equal to the commit version + export COMMIT_HASH=${{ github.sha }} + + chainsaw test tests/e2e/${{ matrix.test-scenario }} + + - name: Destroy Resources + if: always() # Ensures this runs even if earlier steps fail + run: | + tofu -chdir=$TF_DIR destroy -auto-approve + + - name: Extract Tag + id: extract_tag + run: echo "tag=${GITHUB_REF#refs/*/}" >> $GITHUB_OUTPUT + + # Notify Slack on Failure or Cancellation + - name: Notify Slack on Failure or Cancellation + if: ${{ failure() || cancelled() }} + env: + SLACK_WEBHOOK_URL: ${{ secrets.CLOUD_PROVIDERS_TESTS_WEBHOOK_URL }} + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_RUN_ID: ${{ github.run_id }} + run: | + curl -X POST -H 'Content-type: application/json' --data '{"blocks":[{"type":"section","text":{"type":"mrkdwn","text":"*ERROR*: Providers tests fail > `${{ matrix.cloud-provider }} - ${{ matrix.test-scenario }}`"}},{"type":"section","fields":[{"type":"mrkdwn","text":"*Link:*\n"},{"type":"mrkdwn","text":"*Tag:*\n`${{ steps.extract_tag.outputs.tag }}`"}]}]}' ${{ env.SLACK_WEBHOOK_URL }} + + # Notify Slack on Success + - name: Notify Slack on Success + if: ${{ success() }} + env: + SLACK_WEBHOOK_URL: ${{ secrets.CLOUD_PROVIDERS_TESTS_WEBHOOK_URL }} + GITHUB_REPOSITORY: ${{ github.repository }} + GITHUB_RUN_ID: ${{ github.run_id }} + run: | + curl -X POST -H 'Content-type: application/json' --data '{"blocks":[{"type":"section","text":{"type":"mrkdwn","text":"*SUCCESS*: Providers tests succeed > `${{ matrix.cloud-provider }} - ${{ matrix.test-scenario }}`"}},{"type":"section","fields":[{"type":"mrkdwn","text":"*Link:*\n"},{"type":"mrkdwn","text":"*Tag:*\n`${{ steps.extract_tag.outputs.tag }}`"}]}]}' ${{ env.SLACK_WEBHOOK_URL }} + diff --git a/.gitignore b/.gitignore index 7fde39a2c3..5523c57f34 100644 --- a/.gitignore +++ b/.gitignore @@ -7,4 +7,8 @@ cli/odigos .venv **/__pycache__/ **/*.pyc -serving-certs/ \ No newline at end of file +serving-certs/ + +**.tfstate +**.tfstate.backup +**.terraform** \ No newline at end of file diff --git a/tests-infrastructure/terraform/aks/main.tf b/tests-infrastructure/terraform/aks/main.tf new file mode 100644 index 0000000000..ffab99dd00 --- /dev/null +++ b/tests-infrastructure/terraform/aks/main.tf @@ -0,0 +1,27 @@ +terraform { + backend "local" { + path = "terraform-${var.test_scenario}-${var.run_id}.tfstate" + } +} + +resource "azurerm_kubernetes_cluster" "aks" { + name = var.cluster_name + location = azurerm_resource_group.rg.location + resource_group_name = azurerm_resource_group.rg.name + dns_prefix = var.cluster_name + + default_node_pool { + name = "default" + node_count = var.node_count + vm_size = "Standard_B2s" + } + + identity { + type = "SystemAssigned" + } +} + +resource "azurerm_resource_group" "rg" { + name = var.resource_group_name + location = "East US" +} diff --git a/tests-infrastructure/terraform/aks/outputs.tf b/tests-infrastructure/terraform/aks/outputs.tf new file mode 100644 index 0000000000..da1f331bae --- /dev/null +++ b/tests-infrastructure/terraform/aks/outputs.tf @@ -0,0 +1,4 @@ +output "kube_config" { + value = azurerm_kubernetes_cluster.aks.kube_config_raw + sensitive = true +} diff --git a/tests-infrastructure/terraform/aks/provider.tf b/tests-infrastructure/terraform/aks/provider.tf new file mode 100644 index 0000000000..615cb43d9d --- /dev/null +++ b/tests-infrastructure/terraform/aks/provider.tf @@ -0,0 +1,3 @@ +provider "azurerm" { + features {} +} \ No newline at end of file diff --git a/tests-infrastructure/terraform/aks/variables.tf b/tests-infrastructure/terraform/aks/variables.tf new file mode 100644 index 0000000000..01befe9f6b --- /dev/null +++ b/tests-infrastructure/terraform/aks/variables.tf @@ -0,0 +1,24 @@ +variable "resource_group_name" { + description = "Name of the resource group" + default = "tests-rg" +} + +variable "cluster_name" { + description = "Name of the AKS cluster" + default = "tests-aks" +} + +variable "node_count" { + description = "Number of nodes in the cluster" + default = 1 +} + +variable "test_scenario" { + description = "Test scenario to differentiate state files" + type = string +} + +variable "run_id" { + description = "GitHub run ID for uniquely identifying state files" + type = string +} \ No newline at end of file diff --git a/tests/e2e/helm-chart/assert-instrumented-and-pipeline.yaml b/tests/e2e/helm-chart/assert-instrumented-and-pipeline.yaml index f043c52b2b..79b61aaf3f 100644 --- a/tests/e2e/helm-chart/assert-instrumented-and-pipeline.yaml +++ b/tests/e2e/helm-chart/assert-instrumented-and-pipeline.yaml @@ -306,7 +306,7 @@ status: - key: process.runtime.version (value != null): true - key: telemetry.distro.version - value: e2e-test + (value != null): true - key: process.pid (value != null): true --- diff --git a/tests/e2e/helm-chart/chainsaw-test.yaml b/tests/e2e/helm-chart/chainsaw-test.yaml index 637bedb812..b536921069 100644 --- a/tests/e2e/helm-chart/chainsaw-test.yaml +++ b/tests/e2e/helm-chart/chainsaw-test.yaml @@ -32,17 +32,35 @@ spec: P="../../.." # "build" complete helm chart by copying CRDs into the template folder cp -r $P/api/config/crd/bases/* $P/helm/odigos/templates/crds/ - helm upgrade --install odigos $P/helm/odigos --create-namespace --namespace odigos-test-ns --set image.tag=e2e-test + if [ "$MODE" = "cross-cloud-tests" ]; then + helm upgrade --install odigos $P/helm/odigos --create-namespace --namespace odigos-test-ns --set image.tag="$COMMIT_HASH" --set imagePrefix=public.ecr.aws/y2v0v6s7 + else + helm upgrade --install odigos $P/helm/odigos --create-namespace --namespace odigos-test-ns --set image.tag=e2e-test + fi kubectl label namespace odigos-test-ns odigos.io/system-object="true" timeout: 60s - name: Verify Odigos Installation try: - script: + timeout: 200s content: | + echo "Starting Odigos version check..." export ACTUAL_VERSION=$(../../../cli/odigos version --cluster) - if [ "$ACTUAL_VERSION" != "e2e-test" ]; then - echo "Odigos version is not e2e-test, got $ACTUAL_VERSION" - exit 1 + echo "Actual Version: $ACTUAL_VERSION" + + if [ "$MODE" = "cross-cloud-tests" ]; then + if [ "$ACTUAL_VERSION" != "$COMMIT_HASH" ]; then + echo "Odigos version is not the expected commit hash, got $ACTUAL_VERSION" + exit 1 + fi + + kubectl wait --for=condition=ready pods --all -n odigos-test-ns --timeout=40s + + else + if [ "$ACTUAL_VERSION" != "e2e-test" ]; then + echo "Odigos version is not e2e-test, got $ACTUAL_VERSION" + exit 1 + fi fi - assert: file: assert-odigos-installed.yaml @@ -51,20 +69,32 @@ spec: - script: timeout: 100s content: | - docker pull keyval/odigos-demo-inventory:v0.1 - docker pull keyval/odigos-demo-membership:v0.1 - docker pull keyval/odigos-demo-coupon:v0.1 - docker pull keyval/odigos-demo-inventory:v0.1 - docker pull keyval/odigos-demo-frontend:v0.2 - kind load docker-image keyval/odigos-demo-inventory:v0.1 - kind load docker-image keyval/odigos-demo-membership:v0.1 - kind load docker-image keyval/odigos-demo-coupon:v0.1 - kind load docker-image keyval/odigos-demo-inventory:v0.1 - kind load docker-image keyval/odigos-demo-frontend:v0.2 + if [ "$MODE" != "cross-cloud-tests" ]; then + docker pull keyval/odigos-demo-inventory:v0.1 + docker pull keyval/odigos-demo-membership:v0.1 + docker pull keyval/odigos-demo-coupon:v0.1 + docker pull keyval/odigos-demo-frontend:v0.2 + kind load docker-image keyval/odigos-demo-inventory:v0.1 + kind load docker-image keyval/odigos-demo-membership:v0.1 + kind load docker-image keyval/odigos-demo-coupon:v0.1 + kind load docker-image keyval/odigos-demo-frontend:v0.2 + else + echo "Skipping docker pull and kind load for cross-cloud-tests mode" + fi - apply: file: 02-install-simple-demo.yaml + - script: + timeout: 100s + content: | + # Wait for the pods to be ready + kubectl wait --for=condition=ready pod -l app=frontend --timeout=50s + kubectl wait --for=condition=ready pod -l app=coupon --timeout=50s + kubectl wait --for=condition=ready pod -l app=inventory --timeout=50s + kubectl wait --for=condition=ready pod -l app=pricing --timeout=50s + kubectl wait --for=condition=ready pod -l app=membership --timeout=50s - assert: file: assert-apps-installed.yaml + - name: Detect Languages try: - apply: diff --git a/tests/e2e/helm-chart/tracesql/resource-attributes.yaml b/tests/e2e/helm-chart/tracesql/resource-attributes.yaml index 934439b7ed..f6b56aef5d 100644 --- a/tests/e2e/helm-chart/tracesql/resource-attributes.yaml +++ b/tests/e2e/helm-chart/tracesql/resource-attributes.yaml @@ -6,9 +6,9 @@ description: | B. Kubernetes attributes are correctly set on all spans At the time of writing this test, TraceQL api does not support not equal to nil so we use regex instead. query: | - { resource.odigos.version != "e2e-test" || + { resource.odigos.version !~ ".*" || resource.k8s.deployment.name !~ ".*" || - resource.k8s.node.name !~ "kind-control-plane" || + resource.k8s.node.name !~ "(kind-control-plane|aks-.*)" || resource.k8s.pod.name !~ ".*" } expected: count: 0 \ No newline at end of file diff --git a/tests/e2e/multi-apps/assert-instrumented-and-pipeline.yaml b/tests/e2e/multi-apps/assert-instrumented-and-pipeline.yaml index 29e00f88c4..d493ea6e0a 100644 --- a/tests/e2e/multi-apps/assert-instrumented-and-pipeline.yaml +++ b/tests/e2e/multi-apps/assert-instrumented-and-pipeline.yaml @@ -306,7 +306,7 @@ status: - key: process.runtime.version (value != null): true - key: telemetry.distro.version - value: e2e-test + (value != null): true - key: process.pid (value != null): true --- diff --git a/tests/e2e/multi-apps/chainsaw-test.yaml b/tests/e2e/multi-apps/chainsaw-test.yaml index 8dfeb4d47d..6324910cd4 100644 --- a/tests/e2e/multi-apps/chainsaw-test.yaml +++ b/tests/e2e/multi-apps/chainsaw-test.yaml @@ -27,29 +27,40 @@ spec: - name: Install Odigos try: - script: - content: ../../../cli/odigos install --version e2e-test + content: | + if [ "$MODE" = "cross-cloud-tests" ]; then + ../../../cli/odigos install --version "$COMMIT_HASH" --image-prefix=public.ecr.aws/y2v0v6s7 + else + ../../../cli/odigos install --version e2e-test + fi timeout: 60s - assert: file: assert-odigos-installed.yaml + - name: Install Demo App try: - script: - timeout: 100s + timeout: 200s content: | - docker pull keyval/odigos-demo-inventory:v0.1 - docker pull keyval/odigos-demo-membership:v0.1 - docker pull keyval/odigos-demo-coupon:v0.1 - docker pull keyval/odigos-demo-inventory:v0.1 - docker pull keyval/odigos-demo-frontend:v0.2 - kind load docker-image keyval/odigos-demo-inventory:v0.1 - kind load docker-image keyval/odigos-demo-membership:v0.1 - kind load docker-image keyval/odigos-demo-coupon:v0.1 - kind load docker-image keyval/odigos-demo-inventory:v0.1 - kind load docker-image keyval/odigos-demo-frontend:v0.2 + if [ "$MODE" != "cross-cloud-tests" ]; then + docker pull keyval/odigos-demo-inventory:v0.1 + docker pull keyval/odigos-demo-membership:v0.1 + docker pull keyval/odigos-demo-coupon:v0.1 + docker pull keyval/odigos-demo-inventory:v0.1 + docker pull keyval/odigos-demo-frontend:v0.2 + kind load docker-image keyval/odigos-demo-inventory:v0.1 + kind load docker-image keyval/odigos-demo-membership:v0.1 + kind load docker-image keyval/odigos-demo-coupon:v0.1 + kind load docker-image keyval/odigos-demo-inventory:v0.1 + kind load docker-image keyval/odigos-demo-frontend:v0.2 + else + echo "Skipping docker pull and kind load for cross-cloud-tests mode" + fi - apply: file: 02-install-simple-demo.yaml - assert: file: assert-apps-installed.yaml + - name: Detect Languages try: - apply: diff --git a/tests/e2e/multi-apps/tracesql/resource-attributes.yaml b/tests/e2e/multi-apps/tracesql/resource-attributes.yaml index 934439b7ed..f6b56aef5d 100644 --- a/tests/e2e/multi-apps/tracesql/resource-attributes.yaml +++ b/tests/e2e/multi-apps/tracesql/resource-attributes.yaml @@ -6,9 +6,9 @@ description: | B. Kubernetes attributes are correctly set on all spans At the time of writing this test, TraceQL api does not support not equal to nil so we use regex instead. query: | - { resource.odigos.version != "e2e-test" || + { resource.odigos.version !~ ".*" || resource.k8s.deployment.name !~ ".*" || - resource.k8s.node.name !~ "kind-control-plane" || + resource.k8s.node.name !~ "(kind-control-plane|aks-.*)" || resource.k8s.pod.name !~ ".*" } expected: count: 0 \ No newline at end of file