Skip to content

Commit

Permalink
Merge branch 'release-1.4.4' into develop
Browse files Browse the repository at this point in the history
* release-1.4.4:
  Bumping version to 1.4.4
  Updated changelog for new feature.
  EMR: Use region from profile for when global argument is not passed when building EMR applications
  EMR: Add support for --emrfs in create-cluster command.
  • Loading branch information
kyleknap committed Sep 17, 2014
2 parents 999ad81 + a6f40a6 commit 0f0000a
Show file tree
Hide file tree
Showing 14 changed files with 192 additions and 17 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
CHANGELOG
=========

1.4.4
=====

* feature:``aws emr create-cluster``: Add support for ``--emrfs``.


1.4.3
=====

Expand Down
2 changes: 1 addition & 1 deletion awscli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"""
import os

__version__ = '1.4.3'
__version__ = '1.4.4'

#
# Get our data path to be added to botocore's search path
Expand Down
19 changes: 11 additions & 8 deletions awscli/customizations/emr/applicationutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,40 +16,43 @@
from awscli.customizations.emr import exceptions


def build_applications(parsed_applications, parsed_globals, ami_version=None):
def build_applications(session,
parsed_applications, parsed_globals, ami_version=None):
app_list = []
step_list = []
ba_list = []
region = parsed_globals.region if parsed_globals.region \
else session.get_config_variable('region')

for app_config in parsed_applications:
app_name = app_config['Name'].lower()

if app_name == constants.HIVE:
hive_version = constants.LATEST
step_list.append(
_build_install_hive_step(region=parsed_globals.region))
_build_install_hive_step(region=region))
args = app_config.get('Args')
if args is not None:
hive_site_path = _find_matching_arg(
key=constants.HIVE_SITE_KEY, args_list=args)
if hive_site_path is not None:
step_list.append(
_build_install_hive_site_step(
region=parsed_globals.region,
region=region,
hive_site_path=hive_site_path))
elif app_name == constants.PIG:
pig_version = constants.LATEST
step_list.append(
_build_pig_install_step(
region=parsed_globals.region))
region=region))
elif app_name == constants.GANGLIA:
ba_list.append(
_build_ganglia_install_bootstrap_action(
region=parsed_globals.region))
region=region))
elif app_name == constants.HBASE:
ba_list.append(
_build_hbase_install_bootstrap_action(
region=parsed_globals.region))
region=region))
if ami_version >= '3.0':
step_list.append(
_build_hbase_install_step(
Expand All @@ -64,7 +67,7 @@ def build_applications(parsed_applications, parsed_globals, ami_version=None):
elif app_name == constants.IMPALA:
ba_list.append(
_build_impala_install_bootstrap_action(
region=parsed_globals.region,
region=region,
args=app_config.get('Args')))
else:
app_list.append(
Expand Down Expand Up @@ -128,7 +131,7 @@ def _build_install_hive_step(region,
emrutils.build_s3_link(constants.HIVE_SCRIPT_PATH, region),
constants.INSTALL_HIVE_ARG,
constants.BASE_PATH_ARG,
emrutils.build_s3_link(constants.HIVE_BASE_PATH),
emrutils.build_s3_link(constants.HIVE_BASE_PATH, region),
constants.HIVE_VERSIONS,
constants.LATEST]
step = emrutils.build_step(
Expand Down
34 changes: 34 additions & 0 deletions awscli/customizations/emr/argumentschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,37 @@
}
}
}

EMR_FS_SCHEMA = {
"type": "object",
"properties": {
"Consistent": {
"type": "boolean",
"description": "Enable EMRFS consistent view."
},
"SSE": {
"type": "boolean",
"description": "Enable Amazon S3 server-side encryption on files "
"written to S3 by EMRFS."
},
"RetryCount": {
"type": "integer",
"description":
"The maximum number of times to retry upon S3 inconsistency."
},
"RetryPeriod": {
"type": "integer",
"description": "The amount of time (in seconds) until the first "
"retry. Subsequent retries use an exponential "
"back-off."
},
"Args": {
"type": "array",
"description": "A list of arguments to pass for additional "
"EMRFS configuration.",
"items": {
"type": "string"
}
}
}
}
9 changes: 9 additions & 0 deletions awscli/customizations/emr/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,15 @@
DEBUGGING_PATH = '/libs/state-pusher/0.1/fetch'
DEBUGGING_NAME = 'Setup Hadoop Debugging'

CONFIG_HADOOP_PATH = '/bootstrap-actions/configure-hadoop'

EMR_FS_BA_NAME = 'Enable Consistent View in EMR-FS'
EMR_FS_BA_ARG_KEY = '-e'
EMR_FS_CONSISTENT_KEY = 'fs.s3.consistent'
EMR_FS_SSE_KEY = 'fs.s3.enableServerSideEncryption'
EMR_FS_RETRY_COUNT_KEY = 'fs.s3.consistent.retryCount'
EMR_FS_RETRY_PERIOD_KEY = 'fs.s3.consistent.retryPeriodSeconds'

MAX_BOOTSTRAP_ACTION_NUMBER = 16
BOOTSTRAP_ACTION_NAME = 'Bootstrap action'

Expand Down
54 changes: 52 additions & 2 deletions awscli/customizations/emr/createcluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ class CreateCluster(BasicCommand):
DESCRIPTION = (
'Creates and starts running an EMR cluster.\n'
'\nQuick start:\n'
'\naws emr create-cluster --ami-version <ami-version> '
'--instance-type <instance-type> [--instance-count <instance-count>]\n')
'\naws emr create-cluster --ami-version <ami-version> --instance-type'
' <instance-type> [--instance-count <instance-count>]\n')
ARG_TABLE = [
{'name': 'ami-version',
'help_text': helptext.AMI_VERSION,
Expand Down Expand Up @@ -87,6 +87,9 @@ class CreateCluster(BasicCommand):
'help_text': helptext.APPLICATIONS,
'schema': argumentschema.APPLICATIONS_SCHEMA,
'default': defaultconfig.APPLICATIONS},
{'name': 'emrfs',
'help_text': helptext.EMR_FS,
'schema': argumentschema.EMR_FS_SCHEMA},
{'name': 'steps',
'schema': argumentschema.STEPS_SCHEMA,
'help_text': helptext.STEPS},
Expand Down Expand Up @@ -183,6 +186,7 @@ def _run_main(self, parsed_args, parsed_globals):

if parsed_args.applications is not None:
app_list, ba_list, step_list = applicationutils.build_applications(
session=self._session,
parsed_applications=parsed_args.applications,
parsed_globals=parsed_globals,
ami_version=params['AmiVersion'])
Expand Down Expand Up @@ -211,6 +215,19 @@ def _run_main(self, parsed_args, parsed_globals):
cluster=params,
parsed_boostrap_actions=parsed_args.bootstrap_actions)

if parsed_args.emrfs is not None:
emr_fs_ba_args = self._build_emr_fs_args(parsed_args.emrfs)
emr_fs_ba_config = \
emrutils.build_bootstrap_action(
path=emrutils.build_s3_link(
relative_path=constants.CONFIG_HADOOP_PATH,
region=parsed_globals.region),
name=constants.EMR_FS_BA_NAME,
args=emr_fs_ba_args)
self._update_cluster_dict(
cluster=params, key='BootstrapActions',
value=[emr_fs_ba_config])

if parsed_args.steps is not None:
steps_list = steputils.build_step_config_list(
parsed_step_list=parsed_args.steps,
Expand Down Expand Up @@ -359,3 +376,36 @@ def _get_missing_applications_for_steps(self, specified_apps, parsed_args):
step_type not in specified_apps:
missing_apps.add(step['Type'].title())
return missing_apps

def _build_emr_fs_args(self, parsed_emr_fs):
args = []
if parsed_emr_fs.get('Consistent') is not None:
args.append(constants.EMR_FS_BA_ARG_KEY)
args.append(
constants.EMR_FS_CONSISTENT_KEY +
'=' + str(parsed_emr_fs.get('Consistent')).lower())

if parsed_emr_fs.get('SSE') is not None:
args.append(constants.EMR_FS_BA_ARG_KEY)
args.append(
constants.EMR_FS_SSE_KEY + '=' +
str(parsed_emr_fs.get('SSE')).lower())

if parsed_emr_fs.get('RetryCount') is not None:
args.append(constants.EMR_FS_BA_ARG_KEY)
args.append(
constants.EMR_FS_RETRY_COUNT_KEY + '=' +
str(parsed_emr_fs.get('RetryCount')))

if parsed_emr_fs.get('RetryPeriod') is not None:
args.append(constants.EMR_FS_BA_ARG_KEY)
args.append(
constants.EMR_FS_RETRY_PERIOD_KEY + '=' +
str(parsed_emr_fs.get('RetryPeriod')))

if parsed_emr_fs.get('Args') is not None:
for arg in parsed_emr_fs.get('Args'):
args.append(constants.EMR_FS_BA_ARG_KEY)
args.append(arg)

return args
9 changes: 7 additions & 2 deletions awscli/customizations/emr/helptext.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,10 @@
'(e.g. Args=arg1,arg2,arg3) or a bracket-enclosed list of values '
' and/or key-value pairs (e.g. Args=[arg1,arg2=arg3,arg4]).</p>')

EMR_FS = (
'<p>Configures certain features in EMRFS like consistent '
'view and server-side encryption.</p>')

RESTORE_FROM_HBASE = (
'<p>Launches a new HBase cluster and populates it with'
' data from a previous backup of an HBase cluster. You must install HBase'
Expand All @@ -185,14 +189,15 @@
STEPS = (
'<p>A list of steps to be executed by the cluster. A step can be'
' specified either using the shorthand syntax, JSON file or as a JSON'
' string. Note: [Args] supplied with steps should either be a'
' string. Note: [Args] supplied with steps should either be a'
' comma-separated list of values (e.g. Args=arg1,arg2,arg3) or'
' a bracket-enclosed list of values and/or key-value pairs'
' (e.g. Args=[arg1,arg2=arg3,arg4]).</p>')

INSTALL_APPLICATIONS = (
'<p>The applications to be installed.'
' Takes the following parameters: <code>Name</code> and <code>Args</code>.')
' Takes the following parameters: '
'<code>Name</code> and <code>Args</code>.')

LIST_CLUSTERS_CLUSTER_STATES = (
'<p>The cluster state filters to apply when listing clusters.</p>'
Expand Down
4 changes: 2 additions & 2 deletions awscli/customizations/emr/installapplications.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def _run_main(self, parsed_args, parsed_globals):

self._check_for_supported_apps(parsed_args.applications)
parameters['Steps'] = applicationutils.build_applications(
parsed_args.applications, parsed_globals)[2]
self._session, parsed_args.applications, parsed_globals)[2]

emrutils.call_and_display_response(self._session, 'AddJobFlowSteps',
parameters, parsed_globals)
Expand All @@ -61,4 +61,4 @@ def _check_for_supported_apps(self, parsed_applications):
raise ValueError(
"aws: error: Unknown application: " + app_config['Name'] +
". 'Name' should be one of the following: " +
', '.join(constants.APPLICATIONS))
', '.join(constants.APPLICATIONS))
6 changes: 6 additions & 0 deletions awscli/examples/emr/create-cluster-examples.rst
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,9 @@

Name, ActionOnFailure

**17. To enable consistent view and server-side encryption in EMRFS when creating an Amazon EMR cluster and changing RetryCount, RetryPeriod, and encryption algorithm from default values**

- Command::

aws emr create-cluster --instance-type m3.xlarge --ami-version 3.2.1 --emrfs SSE=true,Consistent=true,RetryCount=5,RetryPeriod=30,Args=[fs.s3.serverSideEncryptionAlgorithm=AES256]

1 change: 1 addition & 0 deletions awscli/examples/emr/create-cluster-synopsis.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
[--enable-debugging | --no-enable-debugging]
[--tags <value>]
[--applications <value>]
[--emrfs <value>]
[--bootstrap-actions <value>]
[--steps <value>]
[--restore-from-hbase-backup <value>]
2 changes: 1 addition & 1 deletion doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
# The short X.Y version.
version = '1.4'
# The full version, including alpha/beta/rc tags.
release = '1.4.3'
release = '1.4.4'

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand Down
10 changes: 10 additions & 0 deletions tests/unit/customizations/emr/input_emr_fs.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"Consistent": true,
"SSE": false,
"RetryCount": 10,
"RetryPeriod": 3,
"Args": [
"fs.s3.serverSideEncryptionAlgorithm=AES256",
"fs.s3.sleepTimeSeconds=30"
]
}
41 changes: 41 additions & 0 deletions tests/unit/customizations/emr/test_create_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,15 @@ def test_install_hive_with_defaults(self):
result['Steps'] = [INSTALL_HIVE_STEP]
self.assert_params_for_cmd(cmd, result)

def test_install_hive_with_profile_region(self):
self.driver.session.set_config_variable('region', 'cn-north-1')
cmd = DEFAULT_CMD + '--applications Name=Hive'
HIVE_STEP = json.dumps(INSTALL_HIVE_STEP).\
replace('us-east-1', 'cn-north-1')
result = copy.deepcopy(DEFAULT_RESULT)
result['Steps'] = [json.loads(HIVE_STEP)]
self.assert_params_for_cmd(cmd, result)

def test_install_hive_site(self):
cmdline = (DEFAULT_CMD + '--applications Name=Hive,'
'Args=[--hive-site=s3://test/hive-conf/hive-site.xml]')
Expand Down Expand Up @@ -1156,6 +1165,38 @@ def test_constructed_result(self, call_patch):
result_json = json.loads(result[0])
self.assertEquals(result_json, CONSTRUCTED_RESULT)

def test_emr_fs_config(self):
cmd = DEFAULT_CMD + \
'--emrfs Consistent=true,SSE=false,RetryCount=10,' +\
'RetryPeriod=3,Args=[fs.s3.serverSideEncryptionAlgorithm=' +\
'AES256,fs.s3.sleepTimeSeconds=30]'
emf_fs_ba_config = \
{'Name': 'Enable Consistent View in EMR-FS',
'ScriptBootstrapAction':
{'Path': ('s3://us-east-1.elasticmapreduce/'
'bootstrap-actions/configure-hadoop'),
'Args': ['-e',
'fs.s3.consistent=true',
'-e',
'fs.s3.enableServerSideEncryption=false',
'-e',
'fs.s3.consistent.retryCount=10',
'-e',
'fs.s3.consistent.retryPeriodSeconds=3',
'-e',
'fs.s3.serverSideEncryptionAlgorithm=AES256',
'-e',
'fs.s3.sleepTimeSeconds=30']
}
}
result = copy.deepcopy(DEFAULT_RESULT)
result['BootstrapActions'] = [emf_fs_ba_config]
self.assert_params_for_cmd(cmd, result)

data_path = os.path.join(
os.path.dirname(__file__), 'input_emr_fs.json')
cmd = DEFAULT_CMD + '--emrfs file://' + data_path
self.assert_params_for_cmd(cmd, result)

if __name__ == "__main__":
unittest.main()
12 changes: 11 additions & 1 deletion tests/unit/customizations/emr/test_install_applications.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from tests.unit.customizations.emr import EMRBaseAWSCommandParamsTest as \
BaseAWSCommandParamsTest
import copy
import json


INSTALL_HIVE_STEP = {
Expand Down Expand Up @@ -76,12 +77,21 @@ def test_install_hive_site(self):
'Args=[--hive-site=s3://test/hive-conf/hive-site.xml,k1]')
self.assert_params_for_cmd(cmdline, result)

def test_intall_hive_and_pig(self):
def test_install_hive_and_pig(self):
cmdline = self.prefix + 'Name=Hive Name=Pig'
result = {'JobFlowId': 'j-ABC123456', 'Steps': [INSTALL_HIVE_STEP,
INSTALL_PIG_STEP]}
self.assert_params_for_cmd(cmdline, result)

def test_install_pig_with_profile_region(self):
self.driver.session.set_config_variable('region', 'cn-north-1')
cmdline = self.prefix + 'Name=Pig'
PIG_STEP = json.dumps(INSTALL_PIG_STEP).\
replace('us-east-1', 'cn-north-1')
result = {'JobFlowId': 'j-ABC123456',
'Steps': [json.loads(PIG_STEP)]}
self.assert_params_for_cmd(cmdline, result)

def test_install_impala_error(self):
cmdline = self.prefix + ' Name=Impala'

Expand Down

0 comments on commit 0f0000a

Please sign in to comment.