From fac2ad1835036bc94304a47fbb351de58b79fa53 Mon Sep 17 00:00:00 2001 From: Shuo Diao Date: Tue, 19 Aug 2014 16:00:17 -0700 Subject: [PATCH 1/4] EMR: Add support for --emrfs in create-cluster command. EMR: Update AMI version used in create-cluster --emrfs example. --- awscli/customizations/emr/argumentschema.py | 34 ++++++++++++ awscli/customizations/emr/constants.py | 9 ++++ awscli/customizations/emr/createcluster.py | 53 ++++++++++++++++++- awscli/customizations/emr/helptext.py | 9 +++- .../examples/emr/create-cluster-examples.rst | 6 +++ .../examples/emr/create-cluster-synopsis.rst | 1 + .../unit/customizations/emr/input_emr_fs.json | 10 ++++ .../customizations/emr/test_create_cluster.py | 32 +++++++++++ 8 files changed, 150 insertions(+), 4 deletions(-) create mode 100644 tests/unit/customizations/emr/input_emr_fs.json diff --git a/awscli/customizations/emr/argumentschema.py b/awscli/customizations/emr/argumentschema.py index a0a869e3251e..8aba3f23e443 100644 --- a/awscli/customizations/emr/argumentschema.py +++ b/awscli/customizations/emr/argumentschema.py @@ -212,3 +212,37 @@ } } } + +EMR_FS_SCHEMA = { + "type": "object", + "properties": { + "Consistent": { + "type": "boolean", + "description": "Enable EMRFS consistent view." + }, + "SSE": { + "type": "boolean", + "description": "Enable Amazon S3 server-side encryption on files " + "written to S3 by EMRFS." + }, + "RetryCount": { + "type": "integer", + "description": + "The maximum number of times to retry upon S3 inconsistency." + }, + "RetryPeriod": { + "type": "integer", + "description": "The amount of time (in seconds) until the first " + "retry. Subsequent retries use an exponential " + "back-off." + }, + "Args": { + "type": "array", + "description": "A list of arguments to pass for additional " + "EMRFS configuration.", + "items": { + "type": "string" + } + } + } +} diff --git a/awscli/customizations/emr/constants.py b/awscli/customizations/emr/constants.py index 6f3c61fe0a10..320630251657 100644 --- a/awscli/customizations/emr/constants.py +++ b/awscli/customizations/emr/constants.py @@ -28,6 +28,15 @@ DEBUGGING_PATH = '/libs/state-pusher/0.1/fetch' DEBUGGING_NAME = 'Setup Hadoop Debugging' +CONFIG_HADOOP_PATH = '/bootstrap-actions/configure-hadoop' + +EMR_FS_BA_NAME = 'Enable Consistent View in EMR-FS' +EMR_FS_BA_ARG_KEY = '-e' +EMR_FS_CONSISTENT_KEY = 'fs.s3.consistent' +EMR_FS_SSE_KEY = 'fs.s3.enableServerSideEncryption' +EMR_FS_RETRY_COUNT_KEY = 'fs.s3.consistent.retryCount' +EMR_FS_RETRY_PERIOD_KEY = 'fs.s3.consistent.retryPeriodSeconds' + MAX_BOOTSTRAP_ACTION_NUMBER = 16 BOOTSTRAP_ACTION_NAME = 'Bootstrap action' diff --git a/awscli/customizations/emr/createcluster.py b/awscli/customizations/emr/createcluster.py index dea88fe0bdd6..58608ab12172 100644 --- a/awscli/customizations/emr/createcluster.py +++ b/awscli/customizations/emr/createcluster.py @@ -33,8 +33,8 @@ class CreateCluster(BasicCommand): DESCRIPTION = ( 'Creates and starts running an EMR cluster.\n' '\nQuick start:\n' - '\naws emr create-cluster --ami-version ' - '--instance-type [--instance-count ]\n') + '\naws emr create-cluster --ami-version --instance-type' + ' [--instance-count ]\n') ARG_TABLE = [ {'name': 'ami-version', 'help_text': helptext.AMI_VERSION, @@ -87,6 +87,9 @@ class CreateCluster(BasicCommand): 'help_text': helptext.APPLICATIONS, 'schema': argumentschema.APPLICATIONS_SCHEMA, 'default': defaultconfig.APPLICATIONS}, + {'name': 'emrfs', + 'help_text': helptext.EMR_FS, + 'schema': argumentschema.EMR_FS_SCHEMA}, {'name': 'steps', 'schema': argumentschema.STEPS_SCHEMA, 'help_text': helptext.STEPS}, @@ -211,6 +214,19 @@ def _run_main(self, parsed_args, parsed_globals): cluster=params, parsed_boostrap_actions=parsed_args.bootstrap_actions) + if parsed_args.emrfs is not None: + emr_fs_ba_args = self._build_emr_fs_args(parsed_args.emrfs) + emr_fs_ba_config = \ + emrutils.build_bootstrap_action( + path=emrutils.build_s3_link( + relative_path=constants.CONFIG_HADOOP_PATH, + region=parsed_globals.region), + name=constants.EMR_FS_BA_NAME, + args=emr_fs_ba_args) + self._update_cluster_dict( + cluster=params, key='BootstrapActions', + value=[emr_fs_ba_config]) + if parsed_args.steps is not None: steps_list = steputils.build_step_config_list( parsed_step_list=parsed_args.steps, @@ -359,3 +375,36 @@ def _get_missing_applications_for_steps(self, specified_apps, parsed_args): step_type not in specified_apps: missing_apps.add(step['Type'].title()) return missing_apps + + def _build_emr_fs_args(self, parsed_emr_fs): + args = [] + if parsed_emr_fs.get('Consistent') is not None: + args.append(constants.EMR_FS_BA_ARG_KEY) + args.append( + constants.EMR_FS_CONSISTENT_KEY + + '=' + str(parsed_emr_fs.get('Consistent')).lower()) + + if parsed_emr_fs.get('SSE') is not None: + args.append(constants.EMR_FS_BA_ARG_KEY) + args.append( + constants.EMR_FS_SSE_KEY + '=' + + str(parsed_emr_fs.get('SSE')).lower()) + + if parsed_emr_fs.get('RetryCount') is not None: + args.append(constants.EMR_FS_BA_ARG_KEY) + args.append( + constants.EMR_FS_RETRY_COUNT_KEY + '=' + + str(parsed_emr_fs.get('RetryCount'))) + + if parsed_emr_fs.get('RetryPeriod') is not None: + args.append(constants.EMR_FS_BA_ARG_KEY) + args.append( + constants.EMR_FS_RETRY_PERIOD_KEY + '=' + + str(parsed_emr_fs.get('RetryPeriod'))) + + if parsed_emr_fs.get('Args') is not None: + for arg in parsed_emr_fs.get('Args'): + args.append(constants.EMR_FS_BA_ARG_KEY) + args.append(arg) + + return args diff --git a/awscli/customizations/emr/helptext.py b/awscli/customizations/emr/helptext.py index 3bd166906107..d5d2dde5191b 100644 --- a/awscli/customizations/emr/helptext.py +++ b/awscli/customizations/emr/helptext.py @@ -176,6 +176,10 @@ '(e.g. Args=arg1,arg2,arg3) or a bracket-enclosed list of values ' ' and/or key-value pairs (e.g. Args=[arg1,arg2=arg3,arg4]).

') +EMR_FS = ( + '

Configures certain features in EMRFS like consistent ' + 'view and server-side encryption.

') + RESTORE_FROM_HBASE = ( '

Launches a new HBase cluster and populates it with' ' data from a previous backup of an HBase cluster. You must install HBase' @@ -185,14 +189,15 @@ STEPS = ( '

A list of steps to be executed by the cluster. A step can be' ' specified either using the shorthand syntax, JSON file or as a JSON' - ' string. Note: [Args] supplied with steps should either be a' + ' string. Note: [Args] supplied with steps should either be a' ' comma-separated list of values (e.g. Args=arg1,arg2,arg3) or' ' a bracket-enclosed list of values and/or key-value pairs' ' (e.g. Args=[arg1,arg2=arg3,arg4]).

') INSTALL_APPLICATIONS = ( '

The applications to be installed.' - ' Takes the following parameters: Name and Args.') + ' Takes the following parameters: ' + 'Name and Args.') LIST_CLUSTERS_CLUSTER_STATES = ( '

The cluster state filters to apply when listing clusters.

' diff --git a/awscli/examples/emr/create-cluster-examples.rst b/awscli/examples/emr/create-cluster-examples.rst index dd27da146cff..419eca2ab40d 100644 --- a/awscli/examples/emr/create-cluster-examples.rst +++ b/awscli/examples/emr/create-cluster-examples.rst @@ -158,3 +158,9 @@ Name, ActionOnFailure +**17. To enable consistent view and server-side encryption in EMRFS when creating an Amazon EMR cluster and changing RetryCount, RetryPeriod, and encryption algorithm from default values** + +- Command:: + + aws emr create-cluster --instance-type m3.xlarge --ami-version 3.2.1 --emrfs SSE=true,Consistent=true,RetryCount=5,RetryPeriod=30,Args=[fs.s3.serverSideEncryptionAlgorithm=AES256] + diff --git a/awscli/examples/emr/create-cluster-synopsis.rst b/awscli/examples/emr/create-cluster-synopsis.rst index adb22455248b..50a040e3a92c 100644 --- a/awscli/examples/emr/create-cluster-synopsis.rst +++ b/awscli/examples/emr/create-cluster-synopsis.rst @@ -15,6 +15,7 @@ [--enable-debugging | --no-enable-debugging] [--tags ] [--applications ] + [--emrfs ] [--bootstrap-actions ] [--steps ] [--restore-from-hbase-backup ] diff --git a/tests/unit/customizations/emr/input_emr_fs.json b/tests/unit/customizations/emr/input_emr_fs.json new file mode 100644 index 000000000000..b2844b91ab5b --- /dev/null +++ b/tests/unit/customizations/emr/input_emr_fs.json @@ -0,0 +1,10 @@ +{ + "Consistent": true, + "SSE": false, + "RetryCount": 10, + "RetryPeriod": 3, + "Args": [ + "fs.s3.serverSideEncryptionAlgorithm=AES256", + "fs.s3.sleepTimeSeconds=30" + ] +} diff --git a/tests/unit/customizations/emr/test_create_cluster.py b/tests/unit/customizations/emr/test_create_cluster.py index 663b3e3087b3..8b60e8a410c5 100644 --- a/tests/unit/customizations/emr/test_create_cluster.py +++ b/tests/unit/customizations/emr/test_create_cluster.py @@ -1156,6 +1156,38 @@ def test_constructed_result(self, call_patch): result_json = json.loads(result[0]) self.assertEquals(result_json, CONSTRUCTED_RESULT) + def test_emr_fs_config(self): + cmd = DEFAULT_CMD + \ + '--emrfs Consistent=true,SSE=false,RetryCount=10,' +\ + 'RetryPeriod=3,Args=[fs.s3.serverSideEncryptionAlgorithm=' +\ + 'AES256,fs.s3.sleepTimeSeconds=30]' + emf_fs_ba_config = \ + {'Name': 'Enable Consistent View in EMR-FS', + 'ScriptBootstrapAction': + {'Path': ('s3://us-east-1.elasticmapreduce/' + 'bootstrap-actions/configure-hadoop'), + 'Args': ['-e', + 'fs.s3.consistent=true', + '-e', + 'fs.s3.enableServerSideEncryption=false', + '-e', + 'fs.s3.consistent.retryCount=10', + '-e', + 'fs.s3.consistent.retryPeriodSeconds=3', + '-e', + 'fs.s3.serverSideEncryptionAlgorithm=AES256', + '-e', + 'fs.s3.sleepTimeSeconds=30'] + } + } + result = copy.deepcopy(DEFAULT_RESULT) + result['BootstrapActions'] = [emf_fs_ba_config] + self.assert_params_for_cmd(cmd, result) + + data_path = os.path.join( + os.path.dirname(__file__), 'input_emr_fs.json') + cmd = DEFAULT_CMD + '--emrfs file://' + data_path + self.assert_params_for_cmd(cmd, result) if __name__ == "__main__": unittest.main() From 1776feab4f209f5a1fec8bec643ea69b3db05604 Mon Sep 17 00:00:00 2001 From: Bhargava Kalathuru Date: Mon, 15 Sep 2014 11:34:07 -0700 Subject: [PATCH 2/4] EMR: Use region from profile for when global argument is not passed when building EMR applications --- awscli/customizations/emr/applicationutils.py | 19 +++++++++++-------- awscli/customizations/emr/createcluster.py | 1 + .../customizations/emr/installapplications.py | 4 ++-- .../customizations/emr/test_create_cluster.py | 9 +++++++++ .../emr/test_install_applications.py | 12 +++++++++++- 5 files changed, 34 insertions(+), 11 deletions(-) diff --git a/awscli/customizations/emr/applicationutils.py b/awscli/customizations/emr/applicationutils.py index f16f2ea377e2..9b12ce881efc 100644 --- a/awscli/customizations/emr/applicationutils.py +++ b/awscli/customizations/emr/applicationutils.py @@ -16,10 +16,13 @@ from awscli.customizations.emr import exceptions -def build_applications(parsed_applications, parsed_globals, ami_version=None): +def build_applications(session, + parsed_applications, parsed_globals, ami_version=None): app_list = [] step_list = [] ba_list = [] + region = parsed_globals.region if parsed_globals.region \ + else session.get_config_variable('region') for app_config in parsed_applications: app_name = app_config['Name'].lower() @@ -27,7 +30,7 @@ def build_applications(parsed_applications, parsed_globals, ami_version=None): if app_name == constants.HIVE: hive_version = constants.LATEST step_list.append( - _build_install_hive_step(region=parsed_globals.region)) + _build_install_hive_step(region=region)) args = app_config.get('Args') if args is not None: hive_site_path = _find_matching_arg( @@ -35,21 +38,21 @@ def build_applications(parsed_applications, parsed_globals, ami_version=None): if hive_site_path is not None: step_list.append( _build_install_hive_site_step( - region=parsed_globals.region, + region=region, hive_site_path=hive_site_path)) elif app_name == constants.PIG: pig_version = constants.LATEST step_list.append( _build_pig_install_step( - region=parsed_globals.region)) + region=region)) elif app_name == constants.GANGLIA: ba_list.append( _build_ganglia_install_bootstrap_action( - region=parsed_globals.region)) + region=region)) elif app_name == constants.HBASE: ba_list.append( _build_hbase_install_bootstrap_action( - region=parsed_globals.region)) + region=region)) if ami_version >= '3.0': step_list.append( _build_hbase_install_step( @@ -64,7 +67,7 @@ def build_applications(parsed_applications, parsed_globals, ami_version=None): elif app_name == constants.IMPALA: ba_list.append( _build_impala_install_bootstrap_action( - region=parsed_globals.region, + region=region, args=app_config.get('Args'))) else: app_list.append( @@ -128,7 +131,7 @@ def _build_install_hive_step(region, emrutils.build_s3_link(constants.HIVE_SCRIPT_PATH, region), constants.INSTALL_HIVE_ARG, constants.BASE_PATH_ARG, - emrutils.build_s3_link(constants.HIVE_BASE_PATH), + emrutils.build_s3_link(constants.HIVE_BASE_PATH, region), constants.HIVE_VERSIONS, constants.LATEST] step = emrutils.build_step( diff --git a/awscli/customizations/emr/createcluster.py b/awscli/customizations/emr/createcluster.py index 58608ab12172..94dd7bb5b431 100644 --- a/awscli/customizations/emr/createcluster.py +++ b/awscli/customizations/emr/createcluster.py @@ -186,6 +186,7 @@ def _run_main(self, parsed_args, parsed_globals): if parsed_args.applications is not None: app_list, ba_list, step_list = applicationutils.build_applications( + session=self._session, parsed_applications=parsed_args.applications, parsed_globals=parsed_globals, ami_version=params['AmiVersion']) diff --git a/awscli/customizations/emr/installapplications.py b/awscli/customizations/emr/installapplications.py index 5eaf53119a91..cb45f8e0fd37 100644 --- a/awscli/customizations/emr/installapplications.py +++ b/awscli/customizations/emr/installapplications.py @@ -40,7 +40,7 @@ def _run_main(self, parsed_args, parsed_globals): self._check_for_supported_apps(parsed_args.applications) parameters['Steps'] = applicationutils.build_applications( - parsed_args.applications, parsed_globals)[2] + self._session, parsed_args.applications, parsed_globals)[2] emrutils.call_and_display_response(self._session, 'AddJobFlowSteps', parameters, parsed_globals) @@ -61,4 +61,4 @@ def _check_for_supported_apps(self, parsed_applications): raise ValueError( "aws: error: Unknown application: " + app_config['Name'] + ". 'Name' should be one of the following: " + - ', '.join(constants.APPLICATIONS)) \ No newline at end of file + ', '.join(constants.APPLICATIONS)) diff --git a/tests/unit/customizations/emr/test_create_cluster.py b/tests/unit/customizations/emr/test_create_cluster.py index 8b60e8a410c5..461fee776143 100644 --- a/tests/unit/customizations/emr/test_create_cluster.py +++ b/tests/unit/customizations/emr/test_create_cluster.py @@ -800,6 +800,15 @@ def test_install_hive_with_defaults(self): result['Steps'] = [INSTALL_HIVE_STEP] self.assert_params_for_cmd(cmd, result) + def test_install_hive_with_profile_region(self): + self.driver.session.set_config_variable('region', 'cn-north-1') + cmd = DEFAULT_CMD + '--applications Name=Hive' + HIVE_STEP = json.dumps(INSTALL_HIVE_STEP).\ + replace('us-east-1', 'cn-north-1') + result = copy.deepcopy(DEFAULT_RESULT) + result['Steps'] = [json.loads(HIVE_STEP)] + self.assert_params_for_cmd(cmd, result) + def test_install_hive_site(self): cmdline = (DEFAULT_CMD + '--applications Name=Hive,' 'Args=[--hive-site=s3://test/hive-conf/hive-site.xml]') diff --git a/tests/unit/customizations/emr/test_install_applications.py b/tests/unit/customizations/emr/test_install_applications.py index 7e9b4da04d24..f66827320afe 100644 --- a/tests/unit/customizations/emr/test_install_applications.py +++ b/tests/unit/customizations/emr/test_install_applications.py @@ -14,6 +14,7 @@ from tests.unit.customizations.emr import EMRBaseAWSCommandParamsTest as \ BaseAWSCommandParamsTest import copy +import json INSTALL_HIVE_STEP = { @@ -76,12 +77,21 @@ def test_install_hive_site(self): 'Args=[--hive-site=s3://test/hive-conf/hive-site.xml,k1]') self.assert_params_for_cmd(cmdline, result) - def test_intall_hive_and_pig(self): + def test_install_hive_and_pig(self): cmdline = self.prefix + 'Name=Hive Name=Pig' result = {'JobFlowId': 'j-ABC123456', 'Steps': [INSTALL_HIVE_STEP, INSTALL_PIG_STEP]} self.assert_params_for_cmd(cmdline, result) + def test_install_pig_with_profile_region(self): + self.driver.session.set_config_variable('region', 'cn-north-1') + cmdline = self.prefix + 'Name=Pig' + PIG_STEP = json.dumps(INSTALL_PIG_STEP).\ + replace('us-east-1', 'cn-north-1') + result = {'JobFlowId': 'j-ABC123456', + 'Steps': [json.loads(PIG_STEP)]} + self.assert_params_for_cmd(cmdline, result) + def test_install_impala_error(self): cmdline = self.prefix + ' Name=Impala' From 5f7ad7f62deed66346cef7f197c97c185642511a Mon Sep 17 00:00:00 2001 From: kyleknap Date: Tue, 16 Sep 2014 17:09:52 -0700 Subject: [PATCH 3/4] Updated changelog for new feature. --- CHANGELOG.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 28ee6f3ae7f1..abc23a3f910f 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,12 @@ CHANGELOG ========= +1.4.4 +===== + +* feature:``aws emr create-cluster``: Add support for ``--emrfs``. + + 1.4.3 ===== From a6f40a626b6af73827f5f71f5a41fe11e5077591 Mon Sep 17 00:00:00 2001 From: kyleknap Date: Tue, 16 Sep 2014 17:11:46 -0700 Subject: [PATCH 4/4] Bumping version to 1.4.4 --- awscli/__init__.py | 2 +- doc/source/conf.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/awscli/__init__.py b/awscli/__init__.py index 5efb8801c3a7..fa375e1e7de2 100644 --- a/awscli/__init__.py +++ b/awscli/__init__.py @@ -17,7 +17,7 @@ """ import os -__version__ = '1.4.3' +__version__ = '1.4.4' # # Get our data path to be added to botocore's search path diff --git a/doc/source/conf.py b/doc/source/conf.py index 65082fd1c6ef..266341c53c60 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -52,7 +52,7 @@ # The short X.Y version. version = '1.4' # The full version, including alpha/beta/rc tags. -release = '1.4.3' +release = '1.4.4' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages.