From fac2ad1835036bc94304a47fbb351de58b79fa53 Mon Sep 17 00:00:00 2001
From: Shuo Diao
Date: Tue, 19 Aug 2014 16:00:17 -0700
Subject: [PATCH 1/4] EMR: Add support for --emrfs in create-cluster command.
EMR: Update AMI version used in create-cluster --emrfs example.
---
awscli/customizations/emr/argumentschema.py | 34 ++++++++++++
awscli/customizations/emr/constants.py | 9 ++++
awscli/customizations/emr/createcluster.py | 53 ++++++++++++++++++-
awscli/customizations/emr/helptext.py | 9 +++-
.../examples/emr/create-cluster-examples.rst | 6 +++
.../examples/emr/create-cluster-synopsis.rst | 1 +
.../unit/customizations/emr/input_emr_fs.json | 10 ++++
.../customizations/emr/test_create_cluster.py | 32 +++++++++++
8 files changed, 150 insertions(+), 4 deletions(-)
create mode 100644 tests/unit/customizations/emr/input_emr_fs.json
diff --git a/awscli/customizations/emr/argumentschema.py b/awscli/customizations/emr/argumentschema.py
index a0a869e3251e..8aba3f23e443 100644
--- a/awscli/customizations/emr/argumentschema.py
+++ b/awscli/customizations/emr/argumentschema.py
@@ -212,3 +212,37 @@
}
}
}
+
+EMR_FS_SCHEMA = {
+ "type": "object",
+ "properties": {
+ "Consistent": {
+ "type": "boolean",
+ "description": "Enable EMRFS consistent view."
+ },
+ "SSE": {
+ "type": "boolean",
+ "description": "Enable Amazon S3 server-side encryption on files "
+ "written to S3 by EMRFS."
+ },
+ "RetryCount": {
+ "type": "integer",
+ "description":
+ "The maximum number of times to retry upon S3 inconsistency."
+ },
+ "RetryPeriod": {
+ "type": "integer",
+ "description": "The amount of time (in seconds) until the first "
+ "retry. Subsequent retries use an exponential "
+ "back-off."
+ },
+ "Args": {
+ "type": "array",
+ "description": "A list of arguments to pass for additional "
+ "EMRFS configuration.",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+}
diff --git a/awscli/customizations/emr/constants.py b/awscli/customizations/emr/constants.py
index 6f3c61fe0a10..320630251657 100644
--- a/awscli/customizations/emr/constants.py
+++ b/awscli/customizations/emr/constants.py
@@ -28,6 +28,15 @@
DEBUGGING_PATH = '/libs/state-pusher/0.1/fetch'
DEBUGGING_NAME = 'Setup Hadoop Debugging'
+CONFIG_HADOOP_PATH = '/bootstrap-actions/configure-hadoop'
+
+EMR_FS_BA_NAME = 'Enable Consistent View in EMR-FS'
+EMR_FS_BA_ARG_KEY = '-e'
+EMR_FS_CONSISTENT_KEY = 'fs.s3.consistent'
+EMR_FS_SSE_KEY = 'fs.s3.enableServerSideEncryption'
+EMR_FS_RETRY_COUNT_KEY = 'fs.s3.consistent.retryCount'
+EMR_FS_RETRY_PERIOD_KEY = 'fs.s3.consistent.retryPeriodSeconds'
+
MAX_BOOTSTRAP_ACTION_NUMBER = 16
BOOTSTRAP_ACTION_NAME = 'Bootstrap action'
diff --git a/awscli/customizations/emr/createcluster.py b/awscli/customizations/emr/createcluster.py
index dea88fe0bdd6..58608ab12172 100644
--- a/awscli/customizations/emr/createcluster.py
+++ b/awscli/customizations/emr/createcluster.py
@@ -33,8 +33,8 @@ class CreateCluster(BasicCommand):
DESCRIPTION = (
'Creates and starts running an EMR cluster.\n'
'\nQuick start:\n'
- '\naws emr create-cluster --ami-version '
- '--instance-type [--instance-count ]\n')
+ '\naws emr create-cluster --ami-version --instance-type'
+ ' [--instance-count ]\n')
ARG_TABLE = [
{'name': 'ami-version',
'help_text': helptext.AMI_VERSION,
@@ -87,6 +87,9 @@ class CreateCluster(BasicCommand):
'help_text': helptext.APPLICATIONS,
'schema': argumentschema.APPLICATIONS_SCHEMA,
'default': defaultconfig.APPLICATIONS},
+ {'name': 'emrfs',
+ 'help_text': helptext.EMR_FS,
+ 'schema': argumentschema.EMR_FS_SCHEMA},
{'name': 'steps',
'schema': argumentschema.STEPS_SCHEMA,
'help_text': helptext.STEPS},
@@ -211,6 +214,19 @@ def _run_main(self, parsed_args, parsed_globals):
cluster=params,
parsed_boostrap_actions=parsed_args.bootstrap_actions)
+ if parsed_args.emrfs is not None:
+ emr_fs_ba_args = self._build_emr_fs_args(parsed_args.emrfs)
+ emr_fs_ba_config = \
+ emrutils.build_bootstrap_action(
+ path=emrutils.build_s3_link(
+ relative_path=constants.CONFIG_HADOOP_PATH,
+ region=parsed_globals.region),
+ name=constants.EMR_FS_BA_NAME,
+ args=emr_fs_ba_args)
+ self._update_cluster_dict(
+ cluster=params, key='BootstrapActions',
+ value=[emr_fs_ba_config])
+
if parsed_args.steps is not None:
steps_list = steputils.build_step_config_list(
parsed_step_list=parsed_args.steps,
@@ -359,3 +375,36 @@ def _get_missing_applications_for_steps(self, specified_apps, parsed_args):
step_type not in specified_apps:
missing_apps.add(step['Type'].title())
return missing_apps
+
+ def _build_emr_fs_args(self, parsed_emr_fs):
+ args = []
+ if parsed_emr_fs.get('Consistent') is not None:
+ args.append(constants.EMR_FS_BA_ARG_KEY)
+ args.append(
+ constants.EMR_FS_CONSISTENT_KEY +
+ '=' + str(parsed_emr_fs.get('Consistent')).lower())
+
+ if parsed_emr_fs.get('SSE') is not None:
+ args.append(constants.EMR_FS_BA_ARG_KEY)
+ args.append(
+ constants.EMR_FS_SSE_KEY + '=' +
+ str(parsed_emr_fs.get('SSE')).lower())
+
+ if parsed_emr_fs.get('RetryCount') is not None:
+ args.append(constants.EMR_FS_BA_ARG_KEY)
+ args.append(
+ constants.EMR_FS_RETRY_COUNT_KEY + '=' +
+ str(parsed_emr_fs.get('RetryCount')))
+
+ if parsed_emr_fs.get('RetryPeriod') is not None:
+ args.append(constants.EMR_FS_BA_ARG_KEY)
+ args.append(
+ constants.EMR_FS_RETRY_PERIOD_KEY + '=' +
+ str(parsed_emr_fs.get('RetryPeriod')))
+
+ if parsed_emr_fs.get('Args') is not None:
+ for arg in parsed_emr_fs.get('Args'):
+ args.append(constants.EMR_FS_BA_ARG_KEY)
+ args.append(arg)
+
+ return args
diff --git a/awscli/customizations/emr/helptext.py b/awscli/customizations/emr/helptext.py
index 3bd166906107..d5d2dde5191b 100644
--- a/awscli/customizations/emr/helptext.py
+++ b/awscli/customizations/emr/helptext.py
@@ -176,6 +176,10 @@
'(e.g. Args=arg1,arg2,arg3) or a bracket-enclosed list of values '
' and/or key-value pairs (e.g. Args=[arg1,arg2=arg3,arg4]).
')
+EMR_FS = (
+ 'Configures certain features in EMRFS like consistent '
+ 'view and server-side encryption.
')
+
RESTORE_FROM_HBASE = (
'Launches a new HBase cluster and populates it with'
' data from a previous backup of an HBase cluster. You must install HBase'
@@ -185,14 +189,15 @@
STEPS = (
'
A list of steps to be executed by the cluster. A step can be'
' specified either using the shorthand syntax, JSON file or as a JSON'
- ' string. Note: [Args] supplied with steps should either be a'
+ ' string. Note: [Args] supplied with steps should either be a'
' comma-separated list of values (e.g. Args=arg1,arg2,arg3) or'
' a bracket-enclosed list of values and/or key-value pairs'
' (e.g. Args=[arg1,arg2=arg3,arg4]).
')
INSTALL_APPLICATIONS = (
'The applications to be installed.'
- ' Takes the following parameters: Name
and Args
.')
+ ' Takes the following parameters: '
+ 'Name
and Args
.')
LIST_CLUSTERS_CLUSTER_STATES = (
'
The cluster state filters to apply when listing clusters.
'
diff --git a/awscli/examples/emr/create-cluster-examples.rst b/awscli/examples/emr/create-cluster-examples.rst
index dd27da146cff..419eca2ab40d 100644
--- a/awscli/examples/emr/create-cluster-examples.rst
+++ b/awscli/examples/emr/create-cluster-examples.rst
@@ -158,3 +158,9 @@
Name, ActionOnFailure
+**17. To enable consistent view and server-side encryption in EMRFS when creating an Amazon EMR cluster and changing RetryCount, RetryPeriod, and encryption algorithm from default values**
+
+- Command::
+
+ aws emr create-cluster --instance-type m3.xlarge --ami-version 3.2.1 --emrfs SSE=true,Consistent=true,RetryCount=5,RetryPeriod=30,Args=[fs.s3.serverSideEncryptionAlgorithm=AES256]
+
diff --git a/awscli/examples/emr/create-cluster-synopsis.rst b/awscli/examples/emr/create-cluster-synopsis.rst
index adb22455248b..50a040e3a92c 100644
--- a/awscli/examples/emr/create-cluster-synopsis.rst
+++ b/awscli/examples/emr/create-cluster-synopsis.rst
@@ -15,6 +15,7 @@
[--enable-debugging | --no-enable-debugging]
[--tags ]
[--applications ]
+ [--emrfs ]
[--bootstrap-actions ]
[--steps ]
[--restore-from-hbase-backup ]
diff --git a/tests/unit/customizations/emr/input_emr_fs.json b/tests/unit/customizations/emr/input_emr_fs.json
new file mode 100644
index 000000000000..b2844b91ab5b
--- /dev/null
+++ b/tests/unit/customizations/emr/input_emr_fs.json
@@ -0,0 +1,10 @@
+{
+ "Consistent": true,
+ "SSE": false,
+ "RetryCount": 10,
+ "RetryPeriod": 3,
+ "Args": [
+ "fs.s3.serverSideEncryptionAlgorithm=AES256",
+ "fs.s3.sleepTimeSeconds=30"
+ ]
+}
diff --git a/tests/unit/customizations/emr/test_create_cluster.py b/tests/unit/customizations/emr/test_create_cluster.py
index 663b3e3087b3..8b60e8a410c5 100644
--- a/tests/unit/customizations/emr/test_create_cluster.py
+++ b/tests/unit/customizations/emr/test_create_cluster.py
@@ -1156,6 +1156,38 @@ def test_constructed_result(self, call_patch):
result_json = json.loads(result[0])
self.assertEquals(result_json, CONSTRUCTED_RESULT)
+ def test_emr_fs_config(self):
+ cmd = DEFAULT_CMD + \
+ '--emrfs Consistent=true,SSE=false,RetryCount=10,' +\
+ 'RetryPeriod=3,Args=[fs.s3.serverSideEncryptionAlgorithm=' +\
+ 'AES256,fs.s3.sleepTimeSeconds=30]'
+ emf_fs_ba_config = \
+ {'Name': 'Enable Consistent View in EMR-FS',
+ 'ScriptBootstrapAction':
+ {'Path': ('s3://us-east-1.elasticmapreduce/'
+ 'bootstrap-actions/configure-hadoop'),
+ 'Args': ['-e',
+ 'fs.s3.consistent=true',
+ '-e',
+ 'fs.s3.enableServerSideEncryption=false',
+ '-e',
+ 'fs.s3.consistent.retryCount=10',
+ '-e',
+ 'fs.s3.consistent.retryPeriodSeconds=3',
+ '-e',
+ 'fs.s3.serverSideEncryptionAlgorithm=AES256',
+ '-e',
+ 'fs.s3.sleepTimeSeconds=30']
+ }
+ }
+ result = copy.deepcopy(DEFAULT_RESULT)
+ result['BootstrapActions'] = [emf_fs_ba_config]
+ self.assert_params_for_cmd(cmd, result)
+
+ data_path = os.path.join(
+ os.path.dirname(__file__), 'input_emr_fs.json')
+ cmd = DEFAULT_CMD + '--emrfs file://' + data_path
+ self.assert_params_for_cmd(cmd, result)
if __name__ == "__main__":
unittest.main()
From 1776feab4f209f5a1fec8bec643ea69b3db05604 Mon Sep 17 00:00:00 2001
From: Bhargava Kalathuru
Date: Mon, 15 Sep 2014 11:34:07 -0700
Subject: [PATCH 2/4] EMR: Use region from profile for when global argument is
not passed when building EMR applications
---
awscli/customizations/emr/applicationutils.py | 19 +++++++++++--------
awscli/customizations/emr/createcluster.py | 1 +
.../customizations/emr/installapplications.py | 4 ++--
.../customizations/emr/test_create_cluster.py | 9 +++++++++
.../emr/test_install_applications.py | 12 +++++++++++-
5 files changed, 34 insertions(+), 11 deletions(-)
diff --git a/awscli/customizations/emr/applicationutils.py b/awscli/customizations/emr/applicationutils.py
index f16f2ea377e2..9b12ce881efc 100644
--- a/awscli/customizations/emr/applicationutils.py
+++ b/awscli/customizations/emr/applicationutils.py
@@ -16,10 +16,13 @@
from awscli.customizations.emr import exceptions
-def build_applications(parsed_applications, parsed_globals, ami_version=None):
+def build_applications(session,
+ parsed_applications, parsed_globals, ami_version=None):
app_list = []
step_list = []
ba_list = []
+ region = parsed_globals.region if parsed_globals.region \
+ else session.get_config_variable('region')
for app_config in parsed_applications:
app_name = app_config['Name'].lower()
@@ -27,7 +30,7 @@ def build_applications(parsed_applications, parsed_globals, ami_version=None):
if app_name == constants.HIVE:
hive_version = constants.LATEST
step_list.append(
- _build_install_hive_step(region=parsed_globals.region))
+ _build_install_hive_step(region=region))
args = app_config.get('Args')
if args is not None:
hive_site_path = _find_matching_arg(
@@ -35,21 +38,21 @@ def build_applications(parsed_applications, parsed_globals, ami_version=None):
if hive_site_path is not None:
step_list.append(
_build_install_hive_site_step(
- region=parsed_globals.region,
+ region=region,
hive_site_path=hive_site_path))
elif app_name == constants.PIG:
pig_version = constants.LATEST
step_list.append(
_build_pig_install_step(
- region=parsed_globals.region))
+ region=region))
elif app_name == constants.GANGLIA:
ba_list.append(
_build_ganglia_install_bootstrap_action(
- region=parsed_globals.region))
+ region=region))
elif app_name == constants.HBASE:
ba_list.append(
_build_hbase_install_bootstrap_action(
- region=parsed_globals.region))
+ region=region))
if ami_version >= '3.0':
step_list.append(
_build_hbase_install_step(
@@ -64,7 +67,7 @@ def build_applications(parsed_applications, parsed_globals, ami_version=None):
elif app_name == constants.IMPALA:
ba_list.append(
_build_impala_install_bootstrap_action(
- region=parsed_globals.region,
+ region=region,
args=app_config.get('Args')))
else:
app_list.append(
@@ -128,7 +131,7 @@ def _build_install_hive_step(region,
emrutils.build_s3_link(constants.HIVE_SCRIPT_PATH, region),
constants.INSTALL_HIVE_ARG,
constants.BASE_PATH_ARG,
- emrutils.build_s3_link(constants.HIVE_BASE_PATH),
+ emrutils.build_s3_link(constants.HIVE_BASE_PATH, region),
constants.HIVE_VERSIONS,
constants.LATEST]
step = emrutils.build_step(
diff --git a/awscli/customizations/emr/createcluster.py b/awscli/customizations/emr/createcluster.py
index 58608ab12172..94dd7bb5b431 100644
--- a/awscli/customizations/emr/createcluster.py
+++ b/awscli/customizations/emr/createcluster.py
@@ -186,6 +186,7 @@ def _run_main(self, parsed_args, parsed_globals):
if parsed_args.applications is not None:
app_list, ba_list, step_list = applicationutils.build_applications(
+ session=self._session,
parsed_applications=parsed_args.applications,
parsed_globals=parsed_globals,
ami_version=params['AmiVersion'])
diff --git a/awscli/customizations/emr/installapplications.py b/awscli/customizations/emr/installapplications.py
index 5eaf53119a91..cb45f8e0fd37 100644
--- a/awscli/customizations/emr/installapplications.py
+++ b/awscli/customizations/emr/installapplications.py
@@ -40,7 +40,7 @@ def _run_main(self, parsed_args, parsed_globals):
self._check_for_supported_apps(parsed_args.applications)
parameters['Steps'] = applicationutils.build_applications(
- parsed_args.applications, parsed_globals)[2]
+ self._session, parsed_args.applications, parsed_globals)[2]
emrutils.call_and_display_response(self._session, 'AddJobFlowSteps',
parameters, parsed_globals)
@@ -61,4 +61,4 @@ def _check_for_supported_apps(self, parsed_applications):
raise ValueError(
"aws: error: Unknown application: " + app_config['Name'] +
". 'Name' should be one of the following: " +
- ', '.join(constants.APPLICATIONS))
\ No newline at end of file
+ ', '.join(constants.APPLICATIONS))
diff --git a/tests/unit/customizations/emr/test_create_cluster.py b/tests/unit/customizations/emr/test_create_cluster.py
index 8b60e8a410c5..461fee776143 100644
--- a/tests/unit/customizations/emr/test_create_cluster.py
+++ b/tests/unit/customizations/emr/test_create_cluster.py
@@ -800,6 +800,15 @@ def test_install_hive_with_defaults(self):
result['Steps'] = [INSTALL_HIVE_STEP]
self.assert_params_for_cmd(cmd, result)
+ def test_install_hive_with_profile_region(self):
+ self.driver.session.set_config_variable('region', 'cn-north-1')
+ cmd = DEFAULT_CMD + '--applications Name=Hive'
+ HIVE_STEP = json.dumps(INSTALL_HIVE_STEP).\
+ replace('us-east-1', 'cn-north-1')
+ result = copy.deepcopy(DEFAULT_RESULT)
+ result['Steps'] = [json.loads(HIVE_STEP)]
+ self.assert_params_for_cmd(cmd, result)
+
def test_install_hive_site(self):
cmdline = (DEFAULT_CMD + '--applications Name=Hive,'
'Args=[--hive-site=s3://test/hive-conf/hive-site.xml]')
diff --git a/tests/unit/customizations/emr/test_install_applications.py b/tests/unit/customizations/emr/test_install_applications.py
index 7e9b4da04d24..f66827320afe 100644
--- a/tests/unit/customizations/emr/test_install_applications.py
+++ b/tests/unit/customizations/emr/test_install_applications.py
@@ -14,6 +14,7 @@
from tests.unit.customizations.emr import EMRBaseAWSCommandParamsTest as \
BaseAWSCommandParamsTest
import copy
+import json
INSTALL_HIVE_STEP = {
@@ -76,12 +77,21 @@ def test_install_hive_site(self):
'Args=[--hive-site=s3://test/hive-conf/hive-site.xml,k1]')
self.assert_params_for_cmd(cmdline, result)
- def test_intall_hive_and_pig(self):
+ def test_install_hive_and_pig(self):
cmdline = self.prefix + 'Name=Hive Name=Pig'
result = {'JobFlowId': 'j-ABC123456', 'Steps': [INSTALL_HIVE_STEP,
INSTALL_PIG_STEP]}
self.assert_params_for_cmd(cmdline, result)
+ def test_install_pig_with_profile_region(self):
+ self.driver.session.set_config_variable('region', 'cn-north-1')
+ cmdline = self.prefix + 'Name=Pig'
+ PIG_STEP = json.dumps(INSTALL_PIG_STEP).\
+ replace('us-east-1', 'cn-north-1')
+ result = {'JobFlowId': 'j-ABC123456',
+ 'Steps': [json.loads(PIG_STEP)]}
+ self.assert_params_for_cmd(cmdline, result)
+
def test_install_impala_error(self):
cmdline = self.prefix + ' Name=Impala'
From 5f7ad7f62deed66346cef7f197c97c185642511a Mon Sep 17 00:00:00 2001
From: kyleknap
Date: Tue, 16 Sep 2014 17:09:52 -0700
Subject: [PATCH 3/4] Updated changelog for new feature.
---
CHANGELOG.rst | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 28ee6f3ae7f1..abc23a3f910f 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -2,6 +2,12 @@
CHANGELOG
=========
+1.4.4
+=====
+
+* feature:``aws emr create-cluster``: Add support for ``--emrfs``.
+
+
1.4.3
=====
From a6f40a626b6af73827f5f71f5a41fe11e5077591 Mon Sep 17 00:00:00 2001
From: kyleknap
Date: Tue, 16 Sep 2014 17:11:46 -0700
Subject: [PATCH 4/4] Bumping version to 1.4.4
---
awscli/__init__.py | 2 +-
doc/source/conf.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/awscli/__init__.py b/awscli/__init__.py
index 5efb8801c3a7..fa375e1e7de2 100644
--- a/awscli/__init__.py
+++ b/awscli/__init__.py
@@ -17,7 +17,7 @@
"""
import os
-__version__ = '1.4.3'
+__version__ = '1.4.4'
#
# Get our data path to be added to botocore's search path
diff --git a/doc/source/conf.py b/doc/source/conf.py
index 65082fd1c6ef..266341c53c60 100644
--- a/doc/source/conf.py
+++ b/doc/source/conf.py
@@ -52,7 +52,7 @@
# The short X.Y version.
version = '1.4'
# The full version, including alpha/beta/rc tags.
-release = '1.4.3'
+release = '1.4.4'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.