diff --git a/.github/workflows/static-checking.yml b/.github/workflows/static-checking.yml index ce22ffe06..785bb2dd9 100644 --- a/.github/workflows/static-checking.yml +++ b/.github/workflows/static-checking.yml @@ -28,10 +28,6 @@ jobs: run: | python -m pip install --upgrade pip pip install -U -r requirements-dev.txt - - name: CloudFormation Lint - Base - run: cfn-lint -t cloudformation/base.yaml - - name: CloudFormation Lint - Databases - run: cfn-lint -t cloudformation/databases.yaml - name: mypy check run: mypy awswrangler - name: Flake8 Lint diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5fbb71400..37a5441a7 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -137,13 +137,21 @@ or ``pip install -r requirements-dev.txt`` -* [OPTIONAL] Set AWS_DEFAULT_REGION to define the region the Data Lake Test envrioment will deploy into. You may want to choose a region which you don't currently use: +* Go to the ``test_infra`` directory + +``cd test_infra`` + +* Install CDK dependencies: + +``pip install -r requirements.txt`` + +* [OPTIONAL] Set AWS_DEFAULT_REGION to define the region the Data Lake Test environment will deploy into. You may want to choose a region which you don't currently use: ``export AWS_DEFAULT_REGION=ap-northeast-1`` -* Go to the ``cloudformation`` directory +* Go to the ``scripts`` directory -``cd cloudformation`` +``cd scripts`` * Deploy the Cloudformation template `base.yaml` @@ -151,7 +159,7 @@ or * Return to the project root directory -``cd ..`` +``cd ../../`` * Run the validation script: @@ -167,7 +175,7 @@ or * [OPTIONAL] To remove the base test environment cloud formation stack post testing: -``./cloudformation/delete-base.sh`` +``./test_infra/scripts/delete-base.sh`` ### Full test environment @@ -186,13 +194,21 @@ or ``pip install -r requirements-dev.txt`` +* Go to the ``test_infra`` directory + +``cd test_infra`` + +* Install CDK dependencies: + +``pip install -r requirements.txt`` + * [OPTIONAL] Set AWS_DEFAULT_REGION to define the region the Full Test envrioment will deploy into. You may want to choose a region which you don't currently use: ``export AWS_DEFAULT_REGION=ap-northeast-1`` -* Go to the ``cloudformation`` directory +* Go to the ``scripts`` directory -``cd cloudformation`` +``cd scripts`` * Deploy the Cloudformation templates `base.yaml` and `databases.yaml`. This step could take about 15 minutes to deploy. @@ -212,7 +228,7 @@ or * Return to the project root directory -``cd ..`` +``cd ../../`` * [OPTIONAL] If you intend to run all test, you also need to make sure that you have Amazon QuickSight activated and your AWS user must be register on that. @@ -234,9 +250,9 @@ or * [OPTIONAL] To remove the base test environment cloud formation stack post testing: -``./cloudformation/delete-base.sh`` +``./test_infra/scripts/delete-base.sh`` -``./cloudformation/delete-databases.sh`` +``./test_infra/scripts/delete-databases.sh`` ## Recommended Visual Studio Code Recommended setting diff --git a/cloudformation/base.yaml b/cloudformation/base.yaml deleted file mode 100644 index 6e77560d4..000000000 --- a/cloudformation/base.yaml +++ /dev/null @@ -1,284 +0,0 @@ -AWSTemplateFormatVersion: 2010-09-09 -Description: | - AWS Data Wrangler Development Base Data Lake Infrastructure. VPC, Subnets, S3 Bucket, Glue Database, etc. -Resources: - VPC: - Type: AWS::EC2::VPC - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - - Key: Name - Value: aws-data-wrangler - CidrBlock: 10.19.224.0/19 - EnableDnsSupport: true - EnableDnsHostnames: true - InternetGateway: - Type: AWS::EC2::InternetGateway - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - InternetGatewayAttachment: - Type: AWS::EC2::VPCGatewayAttachment - Properties: - InternetGatewayId: - Ref: InternetGateway - VpcId: - Ref: VPC - PublicSubnet1: - Type: AWS::EC2::Subnet - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - - Key: Name - Value: aws-data-wrangler-public1 - VpcId: - Ref: VPC - AvailabilityZone: - Fn::Select: - - 0 - - Fn::GetAZs: '' - CidrBlock: 10.19.229.0/24 - MapPublicIpOnLaunch: true - PublicSubnet2: - Type: AWS::EC2::Subnet - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - - Key: Name - Value: aws-data-wrangler-public2 - VpcId: - Ref: VPC - AvailabilityZone: - Fn::Select: - - 1 - - Fn::GetAZs: '' - CidrBlock: 10.19.230.0/24 - MapPublicIpOnLaunch: true - PrivateSubnet: - Type: AWS::EC2::Subnet - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - - Key: Name - Value: aws-data-wrangler-private - VpcId: - Ref: VPC - AvailabilityZone: - Fn::Select: - - 0 - - Fn::GetAZs: '' - CidrBlock: 10.19.231.0/24 - MapPublicIpOnLaunch: false - PublicRouteTable: - Type: AWS::EC2::RouteTable - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - VpcId: - Ref: VPC - DefaultPublicRoute: - Type: AWS::EC2::Route - DependsOn: InternetGatewayAttachment - Properties: - RouteTableId: - Ref: PublicRouteTable - DestinationCidrBlock: 0.0.0.0/0 - GatewayId: - Ref: InternetGateway - PublicSubnet1RouteTableAssociation: - Type: AWS::EC2::SubnetRouteTableAssociation - Properties: - RouteTableId: - Ref: PublicRouteTable - SubnetId: - Ref: PublicSubnet1 - PublicSubnet2RouteTableAssociation: - Type: AWS::EC2::SubnetRouteTableAssociation - Properties: - RouteTableId: - Ref: PublicRouteTable - SubnetId: - Ref: PublicSubnet2 - NatGatewayEIP: - Type: AWS::EC2::EIP - DependsOn: InternetGatewayAttachment - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Domain: vpc - NatGateway: - Type: AWS::EC2::NatGateway - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - AllocationId: - Fn::GetAtt: - - NatGatewayEIP - - AllocationId - SubnetId: - Ref: PublicSubnet1 - PrivateRouteTable: - Type: AWS::EC2::RouteTable - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - VpcId: - Ref: VPC - DefaultPrivateRoute: - Type: AWS::EC2::Route - Properties: - RouteTableId: - Ref: PrivateRouteTable - DestinationCidrBlock: 0.0.0.0/0 - NatGatewayId: - Ref: NatGateway - PrivateSubnetRouteTableAssociation: - Type: AWS::EC2::SubnetRouteTableAssociation - Properties: - RouteTableId: - Ref: PrivateRouteTable - SubnetId: - Ref: PrivateSubnet - KmsKeyAlias: - Type: AWS::KMS::Alias - Properties: - AliasName: alias/aws-data-wrangler-key - TargetKeyId: - Ref: KmsKey - KmsKey: - Type: AWS::KMS::Key - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Description: Aws Data Wrangler Test Key. - KeyPolicy: - Version: '2012-10-17' - Id: aws-data-wrangler-key - Statement: - - Sid: Enable IAM User Permissions - Effect: Allow - Principal: - AWS: - Fn::Sub: arn:aws:iam::${AWS::AccountId}:root - Action: kms:* - Resource: '*' - - Sid: Allow administration of the key - Effect: Allow - Principal: - AWS: - Ref: AWS::AccountId - Action: - - kms:Create* - - kms:Describe* - - kms:Enable* - - kms:List* - - kms:Put* - - kms:Update* - - kms:Revoke* - - kms:Disable* - - kms:Get* - - kms:Delete* - - kms:ScheduleKeyDeletion - - kms:CancelKeyDeletion - Resource: '*' - Bucket: - Type: AWS::S3::Bucket - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - PublicAccessBlockConfiguration: - BlockPublicAcls: true - BlockPublicPolicy: true - IgnorePublicAcls: true - RestrictPublicBuckets: true - LifecycleConfiguration: - Rules: - - Id: CleaningUp - Status: Enabled - ExpirationInDays: 1 - AbortIncompleteMultipartUpload: - DaysAfterInitiation: 1 - NoncurrentVersionExpirationInDays: 1 - GlueDatabase: - Type: AWS::Glue::Database - Properties: - CatalogId: - Ref: AWS::AccountId - DatabaseInput: - Name: aws_data_wrangler - Description: AWS Data Wrangler Test Arena - Glue Database - LogGroup: - Type: AWS::Logs::LogGroup - Properties: - RetentionInDays: 30 - LogStream: - Type: AWS::Logs::LogStream - Properties: - LogGroupName: - Ref: LogGroup -Outputs: - Region: - Value: - Ref: AWS::Region - Description: AWS Region - VPC: - Value: - Ref: VPC - Export: - Name: aws-data-wrangler-base-VPC - Description: VPC ID - PublicSubnet1: - Value: - Ref: PublicSubnet1 - Export: - Name: aws-data-wrangler-base-PublicSubnet1 - Description: Subnet ID - PublicSubnet2: - Value: - Ref: PublicSubnet2 - Export: - Name: aws-data-wrangler-base-PublicSubnet2 - Description: Subnet ID 2 - PrivateSubnet: - Value: - Ref: PrivateSubnet - Export: - Name: aws-data-wrangler-base-PrivateSubnet - Description: Private Subnet ID - KmsKeyArn: - Value: - Fn::GetAtt: - - KmsKey - - Arn - Export: - Name: aws-data-wrangler-base-KmsKeyArn - Description: KMS Key ARN. - BucketName: - Value: - Ref: Bucket - Export: - Name: aws-data-wrangler-base-BucketName - Description: Name of the S3 Bucket used for tests. - GlueDatabaseName: - Value: - Ref: GlueDatabase - Description: Glue Database Name. - LogGroupName: - Value: - Ref: LogGroup - Description: LogGroup name. - LogStream: - Value: - Ref: LogStream - Description: LogStream name. diff --git a/cloudformation/databases.yaml b/cloudformation/databases.yaml deleted file mode 100644 index ca698e5df..000000000 --- a/cloudformation/databases.yaml +++ /dev/null @@ -1,582 +0,0 @@ -AWSTemplateFormatVersion: 2010-09-09 -Description: | - AWS Data Wrangler Development Databases Infrastructure Redshift, Aurora PostgreSQL, Aurora MySQL, Microsoft SQL Server -Parameters: - DatabasesPassword: - Type: String - Description: Password for all databases - NoEcho: true -Resources: - RedshiftRole: - Type: AWS::IAM::Role - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - AssumeRolePolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Principal: - Service: - - redshift.amazonaws.com - Action: - - sts:AssumeRole - Path: / - Policies: - - PolicyName: Root - PolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Action: - - kms:Encrypt - - kms:Decrypt - - kms:GenerateDataKey - Resource: - - Fn::ImportValue: aws-data-wrangler-base-KmsKeyArn - - Effect: Allow - Action: - - s3:Get* - - s3:List* - - s3:Put* - Resource: - - Fn::Sub: - - arn:aws:s3:::${Bucket} - - Bucket: - Fn::ImportValue: aws-data-wrangler-base-BucketName - - Fn::Sub: - - arn:aws:s3:::${Bucket}/* - - Bucket: - Fn::ImportValue: aws-data-wrangler-base-BucketName - - Effect: Allow - Action: - - lakeformation:GrantPermissions - Resource: '*' - - Effect: Allow - Action: - - glue:SearchTables - - glue:GetConnections - - glue:GetDataCatalogEncryptionSettings - - glue:GetTables - - glue:GetTableVersions - - glue:GetPartitions - - glue:DeleteTableVersion - - glue:BatchGetPartition - - glue:GetDatabases - - glue:GetTags - - glue:GetTable - - glue:GetDatabase - - glue:GetPartition - - glue:GetTableVersion - - glue:GetConnection - - glue:GetUserDefinedFunction - - glue:GetUserDefinedFunctions - Resource: '*' - RedshiftSubnetGroup: - Type: AWS::Redshift::ClusterSubnetGroup - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Description: AWS Data Wrangler Test Arena - Redshift Subnet Group - SubnetIds: - - Fn::ImportValue: aws-data-wrangler-base-PublicSubnet1 - DatabaseSecurityGroup: - Type: AWS::EC2::SecurityGroup - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - - Key: Name - Value: aws-data-wrangler - VpcId: - Fn::ImportValue: aws-data-wrangler-base-VPC - GroupDescription: AWS Data Wrangler Test Arena - Redshift security group - DatabaseSecurityGroupIngress: - Type: AWS::EC2::SecurityGroupIngress - Properties: - Description: Self Referencing - GroupId: - Ref: DatabaseSecurityGroup - IpProtocol: '-1' - FromPort: 0 - ToPort: 65535 - SourceSecurityGroupId: - Ref: DatabaseSecurityGroup - Redshift: - Type: AWS::Redshift::Cluster - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - DBName: test - MasterUsername: test - MasterUserPassword: - Ref: DatabasesPassword - NodeType: dc2.large - ClusterType: single-node - VpcSecurityGroupIds: - - Ref: DatabaseSecurityGroup - ClusterSubnetGroupName: - Ref: RedshiftSubnetGroup - PubliclyAccessible: true - Port: 5439 - IamRoles: - - Fn::GetAtt: - - RedshiftRole - - Arn - RdsSubnetGroup: - Type: AWS::RDS::DBSubnetGroup - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - DBSubnetGroupDescription: RDS Database Subnet Group - SubnetIds: - - Fn::ImportValue: aws-data-wrangler-base-PublicSubnet1 - - Fn::ImportValue: aws-data-wrangler-base-PublicSubnet2 - RdsRole: - Type: AWS::IAM::Role - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - AssumeRolePolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Principal: - Service: - - rds.amazonaws.com - Action: - - sts:AssumeRole - Path: / - Policies: - - PolicyName: S3GetAndList - PolicyDocument: - Version: 2012-10-17 - Statement: - - Effect: Allow - Action: - - s3:Get* - - s3:List* - - s3:Put* - Resource: - - Fn::Sub: - - arn:aws:s3:::${Bucket} - - Bucket: - Fn::ImportValue: aws-data-wrangler-base-BucketName - - Fn::Sub: - - arn:aws:s3:::${Bucket}/* - - Bucket: - Fn::ImportValue: aws-data-wrangler-base-BucketName - PostgresqlParameterGroup: - Type: AWS::RDS::DBClusterParameterGroup - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Description: Postgres 11 - Family: aurora-postgresql11 - Parameters: - apg_plan_mgmt.capture_plan_baselines: 'off' - AuroraClusterPostgresql: - Type: AWS::RDS::DBCluster - DeletionPolicy: Delete - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Engine: aurora-postgresql - EngineVersion: '11.6' - DBClusterIdentifier: postgresql-cluster-wrangler - MasterUsername: test - MasterUserPassword: - Ref: DatabasesPassword - BackupRetentionPeriod: 1 - DBSubnetGroupName: - Ref: RdsSubnetGroup - VpcSecurityGroupIds: - - Ref: DatabaseSecurityGroup - DBClusterParameterGroupName: - Ref: PostgresqlParameterGroup - AssociatedRoles: - - FeatureName: s3Import - RoleArn: - Fn::GetAtt: - - RdsRole - - Arn - AuroraInstancePostgresql: - Type: AWS::RDS::DBInstance - DeletionPolicy: Delete - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Engine: aurora-postgresql - EngineVersion: '11.6' - DBInstanceIdentifier: postgresql-instance-wrangler - DBClusterIdentifier: - Ref: AuroraClusterPostgresql - DBInstanceClass: db.t3.medium - DBSubnetGroupName: - Ref: RdsSubnetGroup - PubliclyAccessible: true - MysqlParameterGroup: - Type: AWS::RDS::DBClusterParameterGroup - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Description: Mysql 5.7 - Family: aurora-mysql5.7 - Parameters: - aurora_load_from_s3_role: - Fn::GetAtt: - - RdsRole - - Arn - aws_default_s3_role: - Fn::GetAtt: - - RdsRole - - Arn - aurora_select_into_s3_role: - Fn::GetAtt: - - RdsRole - - Arn - AuroraClusterMysql: - Type: AWS::RDS::DBCluster - DeletionPolicy: Delete - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Engine: aurora-mysql - EngineVersion: '5.7' - DBClusterIdentifier: mysql-cluster-wrangler - MasterUsername: test - MasterUserPassword: - Ref: DatabasesPassword - BackupRetentionPeriod: 1 - DBSubnetGroupName: - Ref: RdsSubnetGroup - VpcSecurityGroupIds: - - Ref: DatabaseSecurityGroup - DBClusterParameterGroupName: - Ref: MysqlParameterGroup - DatabaseName: test - AssociatedRoles: - - RoleArn: - Fn::GetAtt: - - RdsRole - - Arn - AuroraInstanceMysql: - Type: AWS::RDS::DBInstance - DeletionPolicy: Delete - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Engine: aurora-mysql - EngineVersion: '5.7' - DBInstanceIdentifier: mysql-instance-wrangler - DBClusterIdentifier: - Ref: AuroraClusterMysql - DBInstanceClass: db.t3.small - DBSubnetGroupName: - Ref: RdsSubnetGroup - PubliclyAccessible: true - SqlServerInstance: - Type: AWS::RDS::DBInstance - DeletionPolicy: Delete - Properties: - Tags: - - Key: Env - Value: aws-data-wrangler - Engine: sqlserver-ex - EngineVersion: '15.00' - DBInstanceIdentifier: sqlserver-instance-wrangler - DBInstanceClass: db.t3.small - AllocatedStorage: '20' - MasterUsername: test - MasterUserPassword: - Ref: DatabasesPassword - DBSubnetGroupName: - Ref: RdsSubnetGroup - VPCSecurityGroups: - - Ref: DatabaseSecurityGroup - PubliclyAccessible: true - AssociatedRoles: - - RoleArn: - Fn::GetAtt: - - RdsRole - - Arn - FeatureName: S3_INTEGRATION - RedshiftGlueConnection: - Type: AWS::Glue::Connection - Properties: - CatalogId: - Ref: AWS::AccountId - ConnectionInput: - Description: Connect to Redshift. - ConnectionType: JDBC - PhysicalConnectionRequirements: - AvailabilityZone: - Fn::Select: - - 0 - - Fn::GetAZs: '' - SecurityGroupIdList: - - Ref: DatabaseSecurityGroup - SubnetId: - Fn::ImportValue: aws-data-wrangler-base-PrivateSubnet - ConnectionProperties: - JDBC_CONNECTION_URL: - Fn::Sub: jdbc:redshift://${Redshift.Endpoint.Address}:${Redshift.Endpoint.Port}/test - USERNAME: test - PASSWORD: - Ref: DatabasesPassword - Name: aws-data-wrangler-redshift - PostgresqlGlueConnection: - Type: AWS::Glue::Connection - Properties: - CatalogId: - Ref: AWS::AccountId - ConnectionInput: - Description: Connect to Aurora (PostgreSQL). - ConnectionType: JDBC - PhysicalConnectionRequirements: - AvailabilityZone: - Fn::Select: - - 0 - - Fn::GetAZs: '' - SecurityGroupIdList: - - Ref: DatabaseSecurityGroup - SubnetId: - Fn::ImportValue: aws-data-wrangler-base-PrivateSubnet - ConnectionProperties: - JDBC_CONNECTION_URL: - Fn::Sub: jdbc:postgresql://${AuroraInstancePostgresql.Endpoint.Address}:${AuroraInstancePostgresql.Endpoint.Port}/postgres - USERNAME: test - PASSWORD: - Ref: DatabasesPassword - Name: aws-data-wrangler-postgresql - MysqlGlueConnection: - Type: AWS::Glue::Connection - Properties: - CatalogId: - Ref: AWS::AccountId - ConnectionInput: - Description: Connect to Aurora (MySQL). - ConnectionType: JDBC - PhysicalConnectionRequirements: - AvailabilityZone: - Fn::Select: - - 0 - - Fn::GetAZs: '' - SecurityGroupIdList: - - Ref: DatabaseSecurityGroup - SubnetId: - Fn::ImportValue: aws-data-wrangler-base-PrivateSubnet - ConnectionProperties: - JDBC_CONNECTION_URL: - Fn::Sub: jdbc:mysql://${AuroraInstanceMysql.Endpoint.Address}:${AuroraInstanceMysql.Endpoint.Port}/test - USERNAME: test - PASSWORD: - Ref: DatabasesPassword - Name: aws-data-wrangler-mysql - MysqlGlueConnectionSSL: - Type: AWS::Glue::Connection - Properties: - CatalogId: - Ref: AWS::AccountId - ConnectionInput: - Description: Connect to Aurora (MySQL) SSL enabled. - ConnectionType: JDBC - PhysicalConnectionRequirements: - AvailabilityZone: - Fn::Select: - - 0 - - Fn::GetAZs: '' - SecurityGroupIdList: - - Ref: DatabaseSecurityGroup - SubnetId: - Fn::ImportValue: aws-data-wrangler-base-PrivateSubnet - ConnectionProperties: - JDBC_CONNECTION_URL: - Fn::Sub: jdbc:mysql://${AuroraInstanceMysql.Endpoint.Address}:${AuroraInstanceMysql.Endpoint.Port}/test - JDBC_ENFORCE_SSL: true - CUSTOM_JDBC_CERT: s3://rds-downloads/rds-combined-ca-bundle.pem - USERNAME: test - PASSWORD: - Ref: DatabasesPassword - Name: aws-data-wrangler-mysql-ssl - SqlServerGlueConnection: - Type: AWS::Glue::Connection - Properties: - CatalogId: - Ref: AWS::AccountId - ConnectionInput: - Description: Connect to SQL Server. - ConnectionType: JDBC - PhysicalConnectionRequirements: - AvailabilityZone: - Fn::Select: - - 0 - - Fn::GetAZs: '' - SecurityGroupIdList: - - Ref: DatabaseSecurityGroup - SubnetId: - Fn::ImportValue: aws-data-wrangler-base-PrivateSubnet - ConnectionProperties: - JDBC_CONNECTION_URL: - Fn::Sub: jdbc:sqlserver://${SqlServerInstance.Endpoint.Address}:${SqlServerInstance.Endpoint.Port};databaseName=test - USERNAME: test - PASSWORD: - Ref: DatabasesPassword - Name: aws-data-wrangler-sqlserver - GlueCatalogSettings: - Type: AWS::Glue::DataCatalogEncryptionSettings - Properties: - CatalogId: - Ref: AWS::AccountId - DataCatalogEncryptionSettings: - ConnectionPasswordEncryption: - KmsKeyId: - Fn::ImportValue: aws-data-wrangler-base-KmsKeyArn - ReturnConnectionPasswordEncrypted: true - EncryptionAtRest: - CatalogEncryptionMode: DISABLED - RedshiftSecret: - Type: AWS::SecretsManager::Secret - Properties: - Name: aws-data-wrangler/redshift - Description: Redshift credentials - SecretString: - Fn::Sub: | - { - "username": "test", - "password": "${DatabasesPassword}", - "engine": "redshift", - "host": "${Redshift.Endpoint.Address}", - "port": ${Redshift.Endpoint.Port}, - "dbClusterIdentifier": "${Redshift}" - } - Tags: - - Key: Env - Value: aws-data-wrangler - PostgresqlSecret: - Type: AWS::SecretsManager::Secret - Properties: - Name: aws-data-wrangler/postgresql - Description: Postgresql credentials - SecretString: - Fn::Sub: | - { - "username": "test", - "password": "${DatabasesPassword}", - "engine": "postgresql", - "host": "${AuroraInstancePostgresql.Endpoint.Address}", - "port": ${AuroraInstancePostgresql.Endpoint.Port}, - "dbClusterIdentifier": "${AuroraInstancePostgresql}", - "dbname": "postgres" - } - Tags: - - Key: Env - Value: aws-data-wrangler - MysqlSecret: - Type: AWS::SecretsManager::Secret - Properties: - Name: aws-data-wrangler/mysql - Description: Mysql credentials - SecretString: - Fn::Sub: | - { - "username": "test", - "password": "${DatabasesPassword}", - "engine": "mysql", - "host": "${AuroraInstanceMysql.Endpoint.Address}", - "port": ${AuroraInstanceMysql.Endpoint.Port}, - "dbClusterIdentifier": "${AuroraInstanceMysql}", - "dbname": "test" - } - Tags: - - Key: Env - Value: aws-data-wrangler - SqlServerSecret: - Type: AWS::SecretsManager::Secret - Properties: - Name: aws-data-wrangler/sqlserver - Description: SQL Server credentials - SecretString: - Fn::Sub: | - { - "username": "test", - "password": "${DatabasesPassword}", - "engine": "sqlserver", - "host": "${SqlServerInstance.Endpoint.Address}", - "port": ${SqlServerInstance.Endpoint.Port}, - "dbClusterIdentifier": "${SqlServerInstance}", - "dbname": "test" - } - Tags: - - Key: Env - Value: aws-data-wrangler - DatabaseSecurityGroupId: - Type: AWS::SSM::Parameter - Properties: - Type: String - Description: Database Security Group Id - Name: /Wrangler/EC2/DatabaseSecurityGroupId - Value: - Fn::GetAtt: - - DatabaseSecurityGroup - - GroupId -Outputs: - DatabasesPassword: - Value: - Ref: DatabasesPassword - Description: Password. - RedshiftIdentifier: - Value: - Ref: Redshift - RedshiftAddress: - Value: - Fn::GetAtt: - - Redshift - - Endpoint.Address - Description: Redshift address. - RedshiftPort: - Value: - Fn::GetAtt: - - Redshift - - Endpoint.Port - Description: Redshift Endpoint Port. - RedshiftRole: - Value: - Fn::GetAtt: - - RedshiftRole - - Arn - Description: Redshift IAM role. - PostgresqlAddress: - Value: - Fn::GetAtt: - - AuroraInstancePostgresql - - Endpoint.Address - Description: Postgresql Address - MysqlAddress: - Value: - Fn::GetAtt: - - AuroraInstanceMysql - - Endpoint.Address - Description: Mysql Address - SqlServerAddress: - Value: - Fn::GetAtt: - - SqlServerInstance - - Endpoint.Address - Description: SQL Server Address - DatabaseSecurityGroupId: - Value: - Fn::GetAtt: - - DatabaseSecurityGroup - - GroupId diff --git a/cloudformation/delete-base.sh b/cloudformation/delete-base.sh deleted file mode 100755 index 06b79ed34..000000000 --- a/cloudformation/delete-base.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash -set -e - -# Deploying -aws cloudformation delete-stack \ - --stack-name aws-data-wrangler-base diff --git a/cloudformation/delete-databases.sh b/cloudformation/delete-databases.sh deleted file mode 100755 index dfbbb3806..000000000 --- a/cloudformation/delete-databases.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env bash -set -e - -# Deleting -aws cloudformation delete-stack \ - --stack-name aws-data-wrangler-databases diff --git a/cloudformation/deploy-base.sh b/cloudformation/deploy-base.sh deleted file mode 100755 index 087c7ef6f..000000000 --- a/cloudformation/deploy-base.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash -set -e - -# Linting and formatting the base stack -cfn-lint -t base.yaml -rm -rf temp.yaml -cfn-flip -c -l -n base.yaml temp.yaml -cfn-lint -t temp.yaml -mv temp.yaml base.yaml - -# Deploying -aws cloudformation deploy \ - --template-file base.yaml \ - --stack-name aws-data-wrangler-base \ - --capabilities CAPABILITY_IAM diff --git a/cloudformation/deploy-databases.sh b/cloudformation/deploy-databases.sh deleted file mode 100755 index 8fdeb959e..000000000 --- a/cloudformation/deploy-databases.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash -set -e - -# Linting and formatting the base stack -cfn-lint -t databases.yaml -rm -rf temp.yaml -cfn-flip -c -l -n databases.yaml temp.yaml -cfn-lint -t temp.yaml -mv temp.yaml databases.yaml - -read -rp "Databases password [123456Ab]: " password -password=${password:-123456Ab} - -# Deploying -aws cloudformation deploy \ - --template-file databases.yaml \ - --stack-name aws-data-wrangler-databases \ - --capabilities CAPABILITY_IAM \ - --parameter-overrides DatabasesPassword="$password" diff --git a/requirements-dev.txt b/requirements-dev.txt index aa25769c1..e52e36f7c 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -12,7 +12,6 @@ pytest-cov==2.12.0 pytest-rerunfailures==9.1.1 pytest-xdist==2.2.1 pytest-timeout==1.4.2 -cfn-lint==0.49.2 pydot==1.4.2 cfn-flip==1.2.3 twine==3.4.1 diff --git a/test_infra/.gitignore b/test_infra/.gitignore new file mode 100644 index 000000000..58505a021 --- /dev/null +++ b/test_infra/.gitignore @@ -0,0 +1,11 @@ +*.swp +package-lock.json +__pycache__ +.pytest_cache +.env +.venv +*.egg-info + +# CDK asset staging directory +.cdk.staging +cdk.out diff --git a/test_infra/README.md b/test_infra/README.md new file mode 100644 index 000000000..529a63918 --- /dev/null +++ b/test_infra/README.md @@ -0,0 +1,62 @@ + +# AWS Data Wrangler Integration Testing Infrastructure + +This project contains infrastructure required to perform integration testing of +[AWS Data Wrangler](https://aws-data-wrangler.readthedocs.io/). + +Infrastructure is defined using +[CDK](https://docs.aws.amazon.com/cdk/latest/guide/home.html) + +The `cdk.json` file tells the CDK Toolkit how to execute your app. + +This project is set up like a standard Python project. The initialization +process also creates a virtualenv within this project, stored under the `.venv` +directory. To create the virtualenv it assumes that there is a `python3` +(or `python` for Windows) executable in your path with access to the `venv` +package. If for any reason the automatic creation of the virtualenv fails, +you can create the virtualenv manually. + +To manually create a virtualenv on MacOS and Linux: + +``` +$ python3 -m venv .venv +``` + +After the init process completes and the virtualenv is created, you can use the following +step to activate your virtualenv. + +``` +$ source .venv/bin/activate +``` + +If you are a Windows platform, you would activate the virtualenv like this: + +``` +% .venv\Scripts\activate.bat +``` + +Once the virtualenv is activated, you can install the required dependencies. + +``` +$ pip install -r requirements.txt +``` + +At this point you can now synthesize the CloudFormation template for this code. + +``` +$ cdk synth +``` + +To add additional dependencies, for example other CDK libraries, just add +them to your `setup.py` file and rerun the `pip install -r requirements.txt` +command. + +## Useful commands + + * `cdk ls` list all stacks in the app + * `cdk synth` emits the synthesized CloudFormation template + * `cdk deploy` deploy this stack to your default AWS account/region + * `cdk diff` compare deployed stack with current state + * `cdk docs` open CDK documentation + +Enjoy! diff --git a/test_infra/app.py b/test_infra/app.py new file mode 100644 index 000000000..bd10648dd --- /dev/null +++ b/test_infra/app.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +from aws_cdk import core as cdk +from test_infra.base_stack import BaseStack +from test_infra.databases_stack import DatabasesStack + +app = cdk.App() + +base = BaseStack(app, "aws-data-wrangler-base") +DatabasesStack( + app, + "aws-data-wrangler-databases", + base.get_vpc, + base.get_bucket, + base.get_key, +) + +app.synth() diff --git a/test_infra/cdk.json b/test_infra/cdk.json new file mode 100644 index 000000000..68f471257 --- /dev/null +++ b/test_infra/cdk.json @@ -0,0 +1,16 @@ +{ + "app": "python3 app.py", + "context": { + "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, + "@aws-cdk/core:enableStackNameDuplicates": "true", + "aws-cdk:enableDiffNoFail": "true", + "@aws-cdk/core:stackRelativeExports": "true", + "@aws-cdk/aws-ecr-assets:dockerIgnoreSupport": true, + "@aws-cdk/aws-secretsmanager:parseOwnedSecretName": true, + "@aws-cdk/aws-kms:defaultKeyPolicies": true, + "@aws-cdk/aws-s3:grantWriteWithoutAcl": true, + "@aws-cdk/aws-ecs-patterns:removeDefaultDesiredCount": true, + "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, + "@aws-cdk/aws-efs:defaultEncryptionAtRest": true + } +} diff --git a/test_infra/requirements.txt b/test_infra/requirements.txt new file mode 100644 index 000000000..469cfb7df --- /dev/null +++ b/test_infra/requirements.txt @@ -0,0 +1,10 @@ +aws-cdk.core +aws-cdk.aws_ec2 +aws-cdk.aws_glue +aws-cdk.aws_iam +aws-cdk.aws_kms +aws-cdk.aws_logs +aws-cdk.aws_s3 +aws-cdk.aws_redshift +aws-cdk.aws_rds +aws_cdk.aws_secretsmanager \ No newline at end of file diff --git a/test_infra/scripts/delete-base.sh b/test_infra/scripts/delete-base.sh new file mode 100755 index 000000000..1edd3dd27 --- /dev/null +++ b/test_infra/scripts/delete-base.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -e + +pushd .. +cdk destroy aws-data-wrangler-base +popd \ No newline at end of file diff --git a/test_infra/scripts/delete-databases.sh b/test_infra/scripts/delete-databases.sh new file mode 100755 index 000000000..31d97451f --- /dev/null +++ b/test_infra/scripts/delete-databases.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -e + +pushd .. +cdk destroy aws-data-wrangler-databases +popd diff --git a/test_infra/scripts/deploy-base.sh b/test_infra/scripts/deploy-base.sh new file mode 100755 index 000000000..5dd7db64f --- /dev/null +++ b/test_infra/scripts/deploy-base.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -e + +pushd .. +cdk bootstrap +cdk deploy aws-data-wrangler-base +popd diff --git a/test_infra/scripts/deploy-databases.sh b/test_infra/scripts/deploy-databases.sh new file mode 100755 index 000000000..eb4c717f7 --- /dev/null +++ b/test_infra/scripts/deploy-databases.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -e + +read -rp "Databases password [123456Ab]: " password +password=${password:-123456Ab} + +pushd .. +cdk bootstrap +cdk deploy aws-data-wrangler-databases --parameters "aws-data-wrangler-databases:dbpassword=$password" +popd diff --git a/cloudformation/security-group-databases-add-local-ip.sh b/test_infra/scripts/security-group-databases-add-local-ip.sh similarity index 100% rename from cloudformation/security-group-databases-add-local-ip.sh rename to test_infra/scripts/security-group-databases-add-local-ip.sh diff --git a/cloudformation/security-group-databases-check.sh b/test_infra/scripts/security-group-databases-check.sh similarity index 100% rename from cloudformation/security-group-databases-check.sh rename to test_infra/scripts/security-group-databases-check.sh diff --git a/test_infra/setup.py b/test_infra/setup.py new file mode 100644 index 000000000..c635af375 --- /dev/null +++ b/test_infra/setup.py @@ -0,0 +1,32 @@ +import setuptools + +with open("README.md") as fp: + long_description = fp.read() + + +setuptools.setup( + name="test_infra", + version="1.0.0", + description="Infrastructure required to run Integration tests", + long_description=long_description, + long_description_content_type="text/markdown", + author="author", + package_dir={"": "test_infra"}, + packages=setuptools.find_packages(where="test_infra"), + install_requires=[ + "aws-cdk.core==1.102.0", + ], + python_requires=">=3.6", + classifiers=[ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "Programming Language :: JavaScript", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Topic :: Software Development :: Code Generators", + "Topic :: Utilities", + "Typing :: Typed", + ], +) diff --git a/test_infra/source.bat b/test_infra/source.bat new file mode 100644 index 000000000..9e1a83442 --- /dev/null +++ b/test_infra/source.bat @@ -0,0 +1,13 @@ +@echo off + +rem The sole purpose of this script is to make the command +rem +rem source .venv/bin/activate +rem +rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. +rem On Windows, this command just runs this batch file (the argument is ignored). +rem +rem Now we don't need to document a Windows command for activating a virtualenv. + +echo Executing .venv\Scripts\activate.bat for you +.venv\Scripts\activate.bat diff --git a/test_infra/test_infra/__init__.py b/test_infra/test_infra/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/test_infra/test_infra/base_stack.py b/test_infra/test_infra/base_stack.py new file mode 100644 index 000000000..cfa957036 --- /dev/null +++ b/test_infra/test_infra/base_stack.py @@ -0,0 +1,129 @@ +from aws_cdk import aws_ec2 as ec2 +from aws_cdk import aws_glue as glue +from aws_cdk import aws_iam as iam +from aws_cdk import aws_kms as kms +from aws_cdk import aws_logs as logs +from aws_cdk import aws_s3 as s3 +from aws_cdk import core as cdk + + +class BaseStack(cdk.Stack): # type: ignore + def __init__(self, scope: cdk.Construct, construct_id: str, **kwargs: str) -> None: + super().__init__(scope, construct_id, **kwargs) + + self.vpc = ec2.Vpc( + self, + "aws-data-wrangler-vpc", + cidr="11.19.224.0/19", + enable_dns_hostnames=True, + enable_dns_support=True, + ) + self.key = kms.Key( + self, + id="aws-data-wrangler-key", + description="Aws Data Wrangler Test Key.", + policy=iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + sid="Enable IAM User Permissions", + effect=iam.Effect.ALLOW, + actions=["kms:*"], + principals=[iam.AccountRootPrincipal()], + resources=["*"], + ) + ] + ), + ) + kms.Alias( + self, + "aws-data-wrangler-key-alias", + alias_name="alias/aws-data-wrangler-key", + target_key=self.key, + ) + self.bucket = s3.Bucket( + self, + id="aws-data-wrangler", + versioned=True, + block_public_access=s3.BlockPublicAccess( + block_public_acls=True, + block_public_policy=True, + ignore_public_acls=True, + restrict_public_buckets=True, + ), + lifecycle_rules=[ + s3.LifecycleRule( + id="CleaningUp", + enabled=True, + expiration=cdk.Duration.days(1), + abort_incomplete_multipart_upload_after=cdk.Duration.days(1), + noncurrent_version_expiration=cdk.Duration.days(1), + ), + ], + ) + glue_db = glue.Database( + self, + id="aws_data_wrangler_glue_database", + database_name="aws_data_wrangler", + ) + log_group = logs.LogGroup( + self, + id="aws_data_wrangler_log_group", + retention=logs.RetentionDays.ONE_MONTH, + ) + log_stream = logs.LogStream( + self, + id="aws_data_wrangler_log_stream", + log_group=log_group, + ) + cdk.CfnOutput(self, "Region", value=self.region) + cdk.CfnOutput( + self, + "VPC", + value=self.vpc.vpc_id, + export_name="aws-data-wrangler-base-VPC", + ) + cdk.CfnOutput( + self, + "PublicSubnet1", + value=self.vpc.public_subnets[0].subnet_id, + export_name="aws-data-wrangler-base-PublicSubnet1", + ) + cdk.CfnOutput( + self, + "PublicSubnet2", + value=self.vpc.public_subnets[1].subnet_id, + export_name="aws-data-wrangler-base-PublicSubnet2", + ) + cdk.CfnOutput( + self, + "PrivateSubnet", + value=self.vpc.private_subnets[0].subnet_id, + export_name="aws-data-wrangler-base-PrivateSubnet", + ) + cdk.CfnOutput( + self, + "KmsKeyArn", + value=self.key.key_arn, + export_name="aws-data-wrangler-base-KmsKeyArn", + ) + cdk.CfnOutput( + self, + "BucketName", + value=self.bucket.bucket_name, + export_name="aws-data-wrangler-base-BucketName", + ) + cdk.CfnOutput(self, "GlueDatabaseName", value=glue_db.database_name) + cdk.CfnOutput(self, "LogGroupName", value=log_group.log_group_name) + cdk.CfnOutput(self, "LogStream", value=log_stream.log_stream_name) + + @property + def get_bucket(self) -> s3.Bucket: + return self.bucket + + @property + def get_vpc(self) -> ec2.Vpc: + return self.vpc + + @property + def get_key(self) -> kms.Key: + return self.key diff --git a/test_infra/test_infra/databases_stack.py b/test_infra/test_infra/databases_stack.py new file mode 100644 index 000000000..a2b0188c8 --- /dev/null +++ b/test_infra/test_infra/databases_stack.py @@ -0,0 +1,472 @@ +import json + +from aws_cdk import aws_ec2 as ec2 +from aws_cdk import aws_glue as glue +from aws_cdk import aws_iam as iam +from aws_cdk import aws_kms as kms +from aws_cdk import aws_rds as rds +from aws_cdk import aws_redshift as redshift +from aws_cdk import aws_s3 as s3 +from aws_cdk import aws_secretsmanager as ssm +from aws_cdk import core as cdk + + +class DatabasesStack(cdk.Stack): # type: ignore + def __init__( + self, + scope: cdk.Construct, + construct_id: str, + vpc: ec2.IVpc, + bucket: s3.IBucket, + key: kms.Key, + **kwargs: str, + ) -> None: + """ + AWS Data Wrangler Development Databases Infrastructure. + Includes Redshift, Aurora PostgreSQL, Aurora MySQL, Microsoft SQL Server. + """ + super().__init__(scope, construct_id, **kwargs) + + self.vpc = vpc + self.key = key + self.bucket = bucket + + self._set_db_infra() + self._set_catalog_encryption() + self._setup_redshift() + self._setup_postgresql() + self._setup_mysql() + self._setup_sqlserver() + + def _set_db_infra(self) -> None: + self.db_username = "test" + self.db_password = cdk.CfnParameter(self, "dbpassword", type="String").value_as_string + self.db_password_secret = cdk.SecretValue(self.db_password) + self.db_security_group = ec2.SecurityGroup( + self, + "aws-data-wrangler-database-sg", + vpc=self.vpc, + description="AWS Data Wrangler Test Arena - Database security group", + ) + self.db_security_group.add_ingress_rule(ec2.Peer.any_ipv4(), ec2.Port.all_traffic()) + self.rds_subnet_group = rds.SubnetGroup( + self, + "aws-data-wrangler-rds-subnet-group", + description="RDS Database Subnet Group", + vpc=self.vpc, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC), + ) + self.rds_role = iam.Role( + self, + "aws-data-wrangler-rds-role", + assumed_by=iam.ServicePrincipal("rds.amazonaws.com"), + inline_policies={ + "S3": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=[ + "s3:Get*", + "s3:List*", + "s3:Put*", + "s3:AbortMultipartUpload", + ], + resources=[ + self.bucket.bucket_arn, + f"{self.bucket.bucket_arn}/*", + ], + ) + ] + ), + }, + ) + cdk.CfnOutput(self, "DatabasesUsername", value=self.db_username) + cdk.CfnOutput(self, "DatabasesPassword", value=self.db_password) + cdk.CfnOutput( + self, + "DatabaseSecurityGroupId", + value=self.db_security_group.security_group_id, + ) + + def _set_catalog_encryption(self) -> None: + glue.CfnDataCatalogEncryptionSettings( + self, + "aws-data-wrangler-catalog-encryption", + catalog_id=cdk.Aws.ACCOUNT_ID, + data_catalog_encryption_settings=glue.CfnDataCatalogEncryptionSettings.DataCatalogEncryptionSettingsProperty( # noqa: E501 + encryption_at_rest=glue.CfnDataCatalogEncryptionSettings.EncryptionAtRestProperty( + catalog_encryption_mode="DISABLED", + ), + connection_password_encryption=glue.CfnDataCatalogEncryptionSettings.ConnectionPasswordEncryptionProperty( # noqa: E501 + kms_key_id=self.key.key_id, + return_connection_password_encrypted=True, + ), + ), + ) + + def _setup_redshift(self) -> None: + port = 5439 + database = "test" + schema = "public" + redshift_role = iam.Role( + self, + "aws-data-wrangler-redshift-role", + assumed_by=iam.ServicePrincipal("redshift.amazonaws.com"), + inline_policies={ + "KMS": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=[ + "kms:Encrypt", + "kms:Decrypt", + "kms:GenerateDataKey", + ], + resources=[self.key.key_arn], + ) + ] + ), + "S3": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=[ + "s3:Get*", + "s3:List*", + "s3:Put*", + ], + resources=[ + self.bucket.bucket_arn, + f"{self.bucket.bucket_arn}/*", + ], + ) + ] + ), + "LakeFormation": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=[ + "lakeformation:GrantPermissions", + ], + resources=["*"], + ) + ] + ), + "Glue": iam.PolicyDocument( + statements=[ + iam.PolicyStatement( + effect=iam.Effect.ALLOW, + actions=[ + "glue:SearchTables", + "glue:GetConnections", + "glue:GetDataCatalogEncryptionSettings", + "glue:GetTables", + "glue:GetTableVersions", + "glue:GetPartitions", + "glue:DeleteTableVersion", + "glue:BatchGetPartition", + "glue:GetDatabases", + "glue:GetTags", + "glue:GetTable", + "glue:GetDatabase", + "glue:GetPartition", + "glue:GetTableVersion", + "glue:GetConnection", + "glue:GetUserDefinedFunction", + "glue:GetUserDefinedFunctions", + ], + resources=["*"], + ) + ] + ), + }, + ) + redshift.ClusterSubnetGroup( + self, + "aws-data-wrangler-redshift-subnet-group", + description="AWS Data Wrangler Test Arena - Redshift Subnet Group", + vpc=self.vpc, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC), + ) + redshift_cluster = redshift.Cluster( + self, + "aws-data-wrangler-redshift-cluster", + default_database_name=database, + master_user=redshift.Login( + master_username=self.db_username, + master_password=self.db_password_secret, + ), + cluster_type=redshift.ClusterType.SINGLE_NODE, + publicly_accessible=True, + port=port, + vpc=self.vpc, + vpc_subnets=ec2.SubnetSelection(subnet_type=ec2.SubnetType.PUBLIC), + security_groups=[self.db_security_group], + roles=[redshift_role], + ) + glue.Connection( + self, + "aws-data-wrangler-redshift-glue-connection", + description="Connect to Redshift.", + type=glue.ConnectionType.JDBC, + connection_name="aws-data-wrangler-redshift", + properties={ + "JDBC_CONNECTION_URL": f"jdbc:redshift://{redshift_cluster.cluster_endpoint.hostname}:{port}/{database}", # noqa: E501 + "USERNAME": self.db_username, + "PASSWORD": self.db_password, + }, + subnet=self.vpc.private_subnets[0], + security_groups=[self.db_security_group], + ) + ssm.Secret( + self, + "aws-data-wrangler-redshift-secret", + secret_name="aws-data-wrangler/redshift", + description="Redshift credentials", + generate_secret_string=ssm.SecretStringGenerator( + generate_string_key="dummy", + secret_string_template=json.dumps( + { + "username": self.db_username, + "password": self.db_password, + "engine": "redshift", + "host": redshift_cluster.cluster_endpoint.hostname, + "port": port, + "dbClusterIdentifier": redshift_cluster.cluster_name, + } + ), + ), + ) + cdk.CfnOutput(self, "RedshiftIdentifier", value=redshift_cluster.cluster_name) + cdk.CfnOutput( + self, + "RedshiftAddress", + value=redshift_cluster.cluster_endpoint.hostname, + ) + cdk.CfnOutput(self, "RedshiftPort", value=str(port)) + cdk.CfnOutput(self, "RedshiftDatabase", value=database) + cdk.CfnOutput(self, "RedshiftSchema", value=schema) + cdk.CfnOutput(self, "RedshiftRole", value=redshift_role.role_arn) + + def _setup_postgresql(self) -> None: + port = 3306 + database = "postgres" + schema = "public" + pg = rds.ParameterGroup( + self, + "aws-data-wrangler-postgresql-params", + engine=rds.DatabaseClusterEngine.aurora_postgres( + version=rds.AuroraPostgresEngineVersion.VER_11_6, + ), + parameters={ + "apg_plan_mgmt.capture_plan_baselines": "off", + }, + ) + aurora_pg = rds.DatabaseCluster( + self, + "aws-data-wrangler-aurora-cluster-postgresql", + removal_policy=cdk.RemovalPolicy.DESTROY, + engine=rds.DatabaseClusterEngine.aurora_postgres( + version=rds.AuroraPostgresEngineVersion.VER_11_6, + ), + cluster_identifier="postgresql-cluster-wrangler", + instances=1, + credentials=rds.Credentials.from_password( + username=self.db_username, + password=self.db_password_secret, + ), + port=port, + backup=rds.BackupProps(retention=cdk.Duration.days(1)), + parameter_group=pg, + s3_import_buckets=[self.bucket], + s3_export_buckets=[self.bucket], + instance_props=rds.InstanceProps( + vpc=self.vpc, + security_groups=[self.db_security_group], + publicly_accessible=True, + ), + subnet_group=self.rds_subnet_group, + ) + glue.Connection( + self, + "aws-data-wrangler-postgresql-glue-connection", + description="Connect to Aurora (PostgreSQL).", + type=glue.ConnectionType.JDBC, + connection_name="aws-data-wrangler-postgresql", + properties={ + "JDBC_CONNECTION_URL": f"jdbc:postgresql://{aurora_pg.cluster_endpoint.hostname}:{port}/{database}", + "USERNAME": self.db_username, + "PASSWORD": self.db_password, + }, + subnet=self.vpc.private_subnets[0], + security_groups=[self.db_security_group], + ) + ssm.Secret( + self, + "aws-data-wrangler-postgresql-secret", + secret_name="aws-data-wrangler/postgresql", + description="Postgresql credentials", + generate_secret_string=ssm.SecretStringGenerator( + generate_string_key="dummy", + secret_string_template=json.dumps( + { + "username": self.db_username, + "password": self.db_password, + "engine": "postgresql", + "host": aurora_pg.cluster_endpoint.hostname, + "port": port, + "dbClusterIdentifier": aurora_pg.cluster_identifier, + "dbname": database, + } + ), + ), + ) + cdk.CfnOutput(self, "PostgresqlAddress", value=aurora_pg.cluster_endpoint.hostname) + cdk.CfnOutput(self, "PostgresqlPort", value=str(port)) + cdk.CfnOutput(self, "PostgresqlDatabase", value=database) + cdk.CfnOutput(self, "PostgresqlSchema", value=schema) + + def _setup_mysql(self) -> None: + port = 3306 + database = "test" + schema = "test" + aurora_mysql = rds.DatabaseCluster( + self, + "aws-data-wrangler-aurora-cluster-mysql", + removal_policy=cdk.RemovalPolicy.DESTROY, + engine=rds.DatabaseClusterEngine.aurora_mysql( + version=rds.AuroraMysqlEngineVersion.VER_5_7_12, + ), + cluster_identifier="mysql-cluster-wrangler", + instances=1, + default_database_name=database, + credentials=rds.Credentials.from_password( + username=self.db_username, + password=self.db_password_secret, + ), + port=port, + backup=rds.BackupProps(retention=cdk.Duration.days(1)), + instance_props=rds.InstanceProps( + vpc=self.vpc, + security_groups=[self.db_security_group], + publicly_accessible=True, + ), + subnet_group=self.rds_subnet_group, + s3_import_buckets=[self.bucket], + s3_export_buckets=[self.bucket], + ) + glue.Connection( + self, + "aws-data-wrangler-mysql-glue-connection", + description="Connect to Aurora (MySQL).", + type=glue.ConnectionType.JDBC, + connection_name="aws-data-wrangler-mysql", + properties={ + "JDBC_CONNECTION_URL": f"jdbc:mysql://{aurora_mysql.cluster_endpoint.hostname}:{port}/{database}", + "USERNAME": self.db_username, + "PASSWORD": self.db_password, + }, + subnet=self.vpc.private_subnets[0], + security_groups=[self.db_security_group], + ) + glue.Connection( + self, + "aws-data-wrangler-mysql-glue-connection-ssl", + type=glue.ConnectionType.JDBC, + connection_name="aws-data-wrangler-mysql-ssl", + properties={ + "JDBC_CONNECTION_URL": f"jdbc:mysql://{aurora_mysql.cluster_endpoint.hostname}:{port}/{database}", + "USERNAME": self.db_username, + "PASSWORD": self.db_password, + "JDBC_ENFORCE_SSL": "true", + "CUSTOM_JDBC_CERT": "s3://rds-downloads/rds-combined-ca-bundle.pem", + }, + subnet=self.vpc.private_subnets[0], + security_groups=[self.db_security_group], + ) + ssm.Secret( + self, + "aws-data-wrangler-mysql-secret", + secret_name="aws-data-wrangler/mysql", + description="MySQL credentials", + generate_secret_string=ssm.SecretStringGenerator( + generate_string_key="dummy", + secret_string_template=json.dumps( + { + "username": self.db_username, + "password": self.db_password, + "engine": "mysql", + "host": aurora_mysql.cluster_endpoint.hostname, + "port": port, + "dbClusterIdentifier": aurora_mysql.cluster_identifier, + "dbname": database, + } + ), + ), + ) + cdk.CfnOutput(self, "MysqlAddress", value=aurora_mysql.cluster_endpoint.hostname) + cdk.CfnOutput(self, "MysqlPort", value=str(port)) + cdk.CfnOutput(self, "MysqlDatabase", value=database) + cdk.CfnOutput(self, "MysqlSchema", value=schema) + + def _setup_sqlserver(self) -> None: + port = 1433 + database = "test" + schema = "dbo" + sqlserver = rds.DatabaseInstance( + self, + "aws-data-wrangler-sqlserver-instance", + instance_identifier="sqlserver-instance-wrangler", + engine=rds.DatabaseInstanceEngine.sql_server_ex(version=rds.SqlServerEngineVersion.VER_15), + instance_type=ec2.InstanceType.of(ec2.InstanceClass.BURSTABLE3, ec2.InstanceSize.SMALL), + credentials=rds.Credentials.from_password( + username=self.db_username, + password=self.db_password_secret, + ), + port=port, + vpc=self.vpc, + subnet_group=self.rds_subnet_group, + security_groups=[self.db_security_group], + publicly_accessible=True, + s3_import_role=self.rds_role, + s3_export_role=self.rds_role, + ) + glue.Connection( + self, + "aws-data-wrangler-sqlserver-glue-connection", + description="Connect to SQL Server.", + type=glue.ConnectionType.JDBC, + connection_name="aws-data-wrangler-sqlserver", + properties={ + "JDBC_CONNECTION_URL": f"jdbc:sqlserver://{sqlserver.instance_endpoint.hostname}:{port};databaseName={database}", # noqa: E501 + "USERNAME": self.db_username, + "PASSWORD": self.db_password, + }, + subnet=self.vpc.private_subnets[0], + security_groups=[self.db_security_group], + ) + ssm.Secret( + self, + "aws-data-wrangler-sqlserver-secret", + secret_name="aws-data-wrangler/sqlserver", + description="SQL Server credentials", + generate_secret_string=ssm.SecretStringGenerator( + generate_string_key="dummy", + secret_string_template=json.dumps( + { + "username": self.db_username, + "password": self.db_password, + "engine": "sqlserver", + "host": sqlserver.instance_endpoint.hostname, + "port": port, + "dbClusterIdentifier": sqlserver.instance_identifier, + "dbname": database, + } + ), + ), + ) + cdk.CfnOutput(self, "SqlServerAddress", value=sqlserver.instance_endpoint.hostname) + cdk.CfnOutput(self, "SqlServerPort", value=str(port)) + cdk.CfnOutput(self, "SqlServerDatabase", value=database) + cdk.CfnOutput(self, "SqlServerSchema", value=schema) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 6db17a2d0..c60557a2f 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -150,11 +150,9 @@ def test_catalog(path: str, glue_database: str, glue_table: str, account_id: str def test_catalog_get_databases(glue_database): - dbs = list(wr.catalog.get_databases()) + dbs = [db["Name"] for db in wr.catalog.get_databases()] assert len(dbs) > 0 - for db in dbs: - if db["Name"] == glue_database: - assert db["Description"] == "AWS Data Wrangler Test Arena - Glue Database" + assert glue_database in dbs def test_catalog_versioning(path, glue_database, glue_table, glue_table2):