forked from awslabs/service-workbench-on-aws
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Revert "feat: Encrypt s3 buckets for EMR log bucket and CICD Artifact…
… bucket (awslabs#508)" This reverts commit d276d1c.
- Loading branch information
1 parent
a87ffcb
commit bdac8a4
Showing
6 changed files
with
652 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
314 changes: 314 additions & 0 deletions
314
addons/addon-base-raas/packages/base-raas-cfn-templates/src/templates/emr-cluster.cfn.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,314 @@ | ||
AWSTemplateFormatVersion: 2010-09-09 | ||
|
||
Description: Service-Workbench-on-AWS EMR-Hail-Jupyter | ||
|
||
Metadata: | ||
AWS::CloudFormation::Interface: | ||
ParameterGroups: | ||
- Label: | ||
default: EMR Options | ||
Parameters: | ||
- Namespace | ||
- KeyName | ||
- VPC | ||
- Subnet | ||
- CoreNodeCount | ||
- DiskSizeGB | ||
- MasterInstanceType | ||
- WorkerInstanceType | ||
- WorkerBidPrice | ||
- AccessFromCIDRBlock | ||
- AmiId | ||
- Label: | ||
default: Tags | ||
Parameters: | ||
- NameTag | ||
- OwnerTag | ||
- PurposeTag | ||
|
||
Parameters: | ||
Namespace: | ||
Type: String | ||
Description: An environment name that will be prefixed to resource names | ||
KeyName: | ||
Description: SSH key pair to use for EMR node login | ||
Type: AWS::EC2::KeyPair::KeyName | ||
VPC: | ||
Description: VPC for EMR nodes. | ||
Type: AWS::EC2::VPC::Id | ||
Subnet: | ||
Description: Subnet for EMR nodes, from the VPC selected above | ||
Type: AWS::EC2::Subnet::Id | ||
CoreNodeCount: | ||
Description: Number of core nodes to provision (1-80) | ||
Type: Number | ||
MinValue: '1' | ||
MaxValue: '80' | ||
Default: '5' | ||
DiskSizeGB: | ||
Description: EBS Volume size (GB) for each node | ||
Type: Number | ||
MinValue: '10' | ||
MaxValue: '1000' | ||
Default: '20' | ||
MasterInstanceType: | ||
Type: String | ||
Default: m5.xlarge | ||
Description: EMR node ec2 instance type. | ||
WorkerInstanceType: | ||
Type: String | ||
Default: m5.xlarge | ||
Description: EMR node ec2 instance type. | ||
Market: | ||
Type: String | ||
Default: ON_DEMAND | ||
Description: Which market to purchase workers on - ON_DEMAND or SPOT. | ||
WorkerBidPrice: | ||
Type: String | ||
Description: Bid price for the worker spot nodes. This is only applicable when Market = SPOT. Specify 0 for Market = ON_DEMAND. | ||
AccessFromCIDRBlock: | ||
Type: String | ||
MinLength: 9 | ||
Description: Restrict WebUI access to specified address or range | ||
AmiId: | ||
Type: String | ||
Description: Ami Id to use for the cluster | ||
EnvironmentInstanceFiles: | ||
Type: String | ||
Description: >- | ||
An S3 URI (starting with "s3://") that specifies the location of files to be copied to | ||
the environment instance, including any bootstrap scripts | ||
S3Mounts: | ||
Type: String | ||
Description: A JSON array of objects with name, bucket and prefix properties used to mount data | ||
IamPolicyDocument: | ||
Type: String | ||
Description: The IAM policy to be associated with the launched workstation | ||
EncryptionKeyArn: | ||
Type: String | ||
Description: The ARN of the KMS encryption Key used to encrypt data in the cluster | ||
|
||
Conditions: | ||
IamPolicyEmpty: !Equals [!Ref IamPolicyDocument, '{}'] | ||
IsOnDemandCondition: !Equals [!Ref Market, ON_DEMAND] | ||
|
||
Resources: | ||
# TODO: Use one bucket for EMR logs per account, so shift deployment to account on-boarding and pass here as param | ||
LogBucket: | ||
Type: AWS::S3::Bucket | ||
DeletionPolicy: Retain | ||
Properties: | ||
PublicAccessBlockConfiguration: # Block all public access configuration for the S3 bucket | ||
BlockPublicAcls: true | ||
BlockPublicPolicy: true | ||
IgnorePublicAcls: true | ||
RestrictPublicBuckets: true | ||
|
||
MasterSecurityGroup: | ||
Type: AWS::EC2::SecurityGroup | ||
Properties: | ||
GroupDescription: Jupyter | ||
VpcId: | ||
Ref: VPC | ||
SecurityGroupIngress: | ||
- IpProtocol: tcp | ||
FromPort: 8192 | ||
ToPort: 8192 | ||
CidrIp: | ||
Ref: AccessFromCIDRBlock | ||
|
||
InstanceProfile: | ||
Properties: | ||
Path: '/' | ||
Roles: | ||
- Ref: Ec2Role | ||
Type: AWS::IAM::InstanceProfile | ||
|
||
Ec2Role: | ||
Type: 'AWS::IAM::Role' | ||
Properties: | ||
RoleName: !Join ['-', [Ref: Namespace, 'ec2-role']] | ||
Path: '/' | ||
AssumeRolePolicyDocument: | ||
Version: '2012-10-17' | ||
Statement: | ||
- Effect: 'Allow' | ||
Principal: | ||
Service: | ||
- 'ec2.amazonaws.com' | ||
Action: | ||
- 'sts:AssumeRole' | ||
Policies: | ||
- !If | ||
- IamPolicyEmpty | ||
- !Ref 'AWS::NoValue' | ||
- PolicyName: !Join ['-', [Ref: Namespace, 's3-studydata-policy']] | ||
PolicyDocument: !Ref IamPolicyDocument | ||
- PolicyName: !Join ['-', [Ref: Namespace, 's3-bootstrap-script-policy']] | ||
PolicyDocument: | ||
Version: '2012-10-17' | ||
Statement: | ||
- Effect: 'Allow' | ||
Action: 's3:GetObject' | ||
Resource: | ||
- 'arn:aws:s3:::us-east-1.elasticmapreduce/bootstrap-actions/run-if' | ||
- !Sub | ||
- 'arn:aws:s3:::${S3Location}/*' | ||
# Remove "s3://" prefix from EnvironmentInstanceFiles | ||
- S3Location: !Select [1, !Split ['s3://', !Ref EnvironmentInstanceFiles]] | ||
- Effect: 'Allow' | ||
Action: 's3:ListBucket' | ||
Resource: !Sub | ||
- 'arn:aws:s3:::${S3Bucket}' | ||
- S3Bucket: !Select [2, !Split ['/', !Ref EnvironmentInstanceFiles]] | ||
Condition: | ||
StringLike: | ||
s3:prefix: !Sub | ||
- '${S3Prefix}/*' | ||
- S3Prefix: !Select [3, !Split ['/', !Ref EnvironmentInstanceFiles]] | ||
|
||
ServiceRole: | ||
Type: AWS::IAM::Role | ||
Properties: | ||
Path: '/' | ||
AssumeRolePolicyDocument: | ||
Statement: | ||
- Action: | ||
- sts:AssumeRole | ||
Effect: Allow | ||
Principal: | ||
Service: | ||
- elasticmapreduce.amazonaws.com | ||
Version: '2012-10-17' | ||
ManagedPolicyArns: | ||
- arn:aws:iam::aws:policy/service-role/AmazonElasticMapReduceRole | ||
|
||
EmrSecurityConfiguration: | ||
Type: AWS::EMR::SecurityConfiguration | ||
Properties: | ||
SecurityConfiguration: | ||
{ | ||
'EncryptionConfiguration': | ||
{ | ||
'AtRestEncryptionConfiguration': | ||
{ | ||
'LocalDiskEncryptionConfiguration': | ||
{ | ||
'EncryptionKeyProviderType': 'AwsKms', | ||
'AwsKmsKey': { 'Ref': 'EncryptionKeyArn' }, | ||
'EnableEbsEncryption': true, | ||
}, | ||
}, | ||
'EnableInTransitEncryption': false, | ||
'EnableAtRestEncryption': true, | ||
}, | ||
} | ||
|
||
# TODO: customise jupyter password from launch ui | ||
# TODO: Add security configuration to cluster | ||
# TODO: Can we make the jupyter use https? | ||
# TODO: Also change notebook owner to hadoop on launch | ||
EmrCluster: | ||
Type: AWS::EMR::Cluster | ||
Properties: | ||
Applications: | ||
- Name: Hadoop | ||
- Name: Hive | ||
- Name: Spark | ||
BootstrapActions: | ||
- Name: Run-Python-Jupyter | ||
ScriptBootstrapAction: | ||
Path: s3://us-east-1.elasticmapreduce/bootstrap-actions/run-if | ||
Args: | ||
- 'instance.isMaster=true' | ||
- '/opt/hail-on-AWS-spot-instances/src/jupyter_run.sh' | ||
- Name: Mount-S3-Resources | ||
ScriptBootstrapAction: | ||
Path: !Sub '${EnvironmentInstanceFiles}/get_bootstrap.sh' | ||
Args: | ||
- !Ref EnvironmentInstanceFiles | ||
- !Ref S3Mounts | ||
CustomAmiId: | ||
Ref: AmiId | ||
Configurations: | ||
- Classification: spark | ||
ConfigurationProperties: | ||
maximizeResourceAllocation: true | ||
- Classification: yarn-site | ||
ConfigurationProperties: | ||
yarn.nodemanager.vmem-check-enabled: false | ||
- Classification: spark-defaults | ||
ConfigurationProperties: | ||
spark.hadoop.io.compression.codecs: 'org.apache.hadoop.io.compress.DefaultCodec,is.hail.io.compress.BGzipCodec,org.apache.hadoop.io.compress.GzipCodec' | ||
spark.serializer: 'org.apache.spark.serializer.KryoSerializer' | ||
spark.hadoop.parquet.block.size: '1099511627776' | ||
spark.sql.files.maxPartitionBytes: '1099511627776' | ||
spark.sql.files.openCostInBytes: '1099511627776' | ||
Configurations: [] | ||
Instances: | ||
AdditionalMasterSecurityGroups: | ||
- Fn::GetAtt: | ||
- MasterSecurityGroup | ||
- GroupId | ||
Ec2KeyName: | ||
Ref: KeyName | ||
Ec2SubnetId: | ||
Ref: Subnet | ||
MasterInstanceGroup: | ||
InstanceCount: 1 | ||
InstanceType: | ||
Ref: MasterInstanceType | ||
CoreInstanceGroup: !If | ||
- IsOnDemandCondition | ||
- InstanceCount: | ||
Ref: CoreNodeCount | ||
InstanceType: | ||
Ref: WorkerInstanceType | ||
Market: | ||
Ref: Market | ||
EbsConfiguration: | ||
EbsOptimized: true | ||
EbsBlockDeviceConfigs: | ||
- VolumeSpecification: | ||
SizeInGB: | ||
Ref: DiskSizeGB | ||
VolumeType: gp2 | ||
- InstanceCount: | ||
Ref: CoreNodeCount | ||
InstanceType: | ||
Ref: WorkerInstanceType | ||
Market: | ||
Ref: Market | ||
BidPrice: | ||
Ref: WorkerBidPrice | ||
EbsConfiguration: | ||
EbsOptimized: true | ||
EbsBlockDeviceConfigs: | ||
- VolumeSpecification: | ||
SizeInGB: | ||
Ref: DiskSizeGB | ||
VolumeType: gp2 | ||
JobFlowRole: | ||
Ref: InstanceProfile | ||
Name: !Sub '${Namespace}-emr' | ||
Tags: # Add Name tag so EC2 instances are easily identifiable | ||
- Key: Name | ||
Value: !Sub '${Namespace}-emr' | ||
ServiceRole: | ||
Ref: ServiceRole | ||
ReleaseLabel: emr-5.27.0 | ||
# This has to be true because we assume a new user each time. | ||
VisibleToAllUsers: true | ||
SecurityConfiguration: !Ref EmrSecurityConfiguration | ||
LogUri: !Sub 's3://${LogBucket}' | ||
|
||
Outputs: | ||
JupyterUrl: | ||
Description: Open Jupyter on your new EMR cluster | ||
Value: !Sub 'http://${EmrCluster.MasterPublicDNS}:8192' | ||
LogBucket: | ||
Description: EMR Scratch data and Logs bucket | ||
Value: !Ref LogBucket | ||
WorkspaceInstanceRoleArn: | ||
Description: IAM role assumed by the EMR workspace instances | ||
Value: !GetAtt Ec2Role.Arn |
Oops, something went wrong.