forked from Signiant/dynamodb-emr-exporter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
restoreEMR.sh
executable file
·172 lines (143 loc) · 6.67 KB
/
restoreEMR.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/bin/bash
# Inputs
APPNAME=$1
CLUSTER_NAME=$2
PROFILE=$3
JSON_INPUT_DIR=$4
S3LOCATION=$5
# Hard-codes (but can be changed here)
RETRY_DELAY=10
CLUSTER_REGION=us-west-2
# Just vars
INSTALL_DIR=/usr/local/dynamodb-emr
NEXTPHASE=0
RETCODE=0
logMsg()
{
PROGNAME=restoreEMR
PID=$$
logger -t ${PROGNAME}[$PID] $1
echo $1
}
usage()
{
echo "Usage: restoreEMR app_name emr_cluster_name boto_profile_name json_input_directory S3_location_for_logs"
}
pollCluster()
{
PROFILE=$1
CLUSTERID=$2
CLUSTERNAME=$3
COMPLETE=0
ERRORS=0
logMsg "polling cluster NAME:${CLUSTERNAME} ID ${CLUSTERID} for status in profile ${PROFILE}"
while [ $COMPLETE -ne 1 ]
do
CLUSTER_STATUS=$(aws emr describe-cluster --cluster-id $CLUSTERID --profile $PROFILE --region $CLUSTER_REGION |jq -r '.["Cluster"]["Status"]["State"]')
#echo "STATUS IS $CLUSTER_STATUS"
if [ "${CLUSTER_STATUS}" == "TERMINATED" ]; then
# We need to check if there were step errors
STEPS_STATUS=$(aws emr describe-cluster --cluster-id $CLUSTERID --profile $PROFILE --region $CLUSTER_REGION | jq -r '.["Cluster"]["Status"]["StateChangeReason"]["Message"]')
if [ "${STEPS_STATUS}" == "Steps completed with errors" ]; then
ERRORS=1
else
ERRORS=0
fi
COMPLETE=1
elif [ "${CLUSTER_STATUS}" == "TERMINATED_WITH_ERRORS" ]; then
ERRORS=1
COMPLETE=1
fi
sleep 10
done
return $ERRORS
}
if [ $# != 5 ]; then
usage
exit 1
fi
logMsg "Starting up"
######
## PHASE 1 - See if there are any clusters already runing with our name. If there are, exit
######
aws emr list-clusters --active --profile ${PROFILE} --region $CLUSTER_REGION | grep -q ${CLUSTER_NAME}
STATUS=$?
if [ $STATUS == 0 ]; then
# We already have a cluster running - bail
logMsg "Cluster ERROR: existing cluster ${CLUSTER_NAME} running"
NEXTPHASE=0
RETCODE=2
else
logMsg "No existing EMR cluster with name ${CLUSTER_NAME} running. Creating"
NEXTPHASE=1
fi
######
## PHASE 1 - Create the EMR cluster (with retries)
######
if [ $NEXTPHASE == 1 ]; then
RETRIES=5
CURR_ATTEMPT=1
while [ $CURR_ATTEMPT -le $RETRIES ]
do
CLUSTERUP=0
# Invoke the aws CLI to create the cluster
logMsg "Creating new EMR Cluster NAME:${CLUSTER_NAME} Attempt ${CURR_ATTEMPT} of ${RETRIES}"
CLUSTERID=$(aws emr create-cluster --name "${CLUSTER_NAME}" \
--ami-version 3.8.0 \
--service-role "EMR_DefaultRole" \
--tags Name=${CLUSTER_NAME} signiant:product=devops signiant:email=devops@signiant.com \
--enable-debugging \
--log-uri ${S3LOCATION}/emr-logs \
--applications file://${JSON_INPUT_DIR}/applications.json \
--instance-groups file://${JSON_INPUT_DIR}/instance-groups.json \
--ec2-attributes file://${JSON_INPUT_DIR}/ec2-attributes.json \
--bootstrap-actions file://${JSON_INPUT_DIR}/bootstrap-actions-import.json \
--steps file://${JSON_INPUT_DIR}/importSteps.json \
--auto-terminate \
--visible-to-all-users \
--output text \
--region ${CLUSTER_REGION} \
--profile ${PROFILE})
logMsg "CLUSTERID for ${CLUSTER_NAME} is $CLUSTERID"
# Now use the waiter to make sure the cluster is launched successfully
if [ "$CLUSTERID" != "" ]; then
logMsg "Waiting for cluster NAME:${CLUSTER_NAME} ID:${CLUSTERID} to start...."
aws emr wait cluster-running --cluster-id ${CLUSTERID} --profile ${PROFILE} --region ${CLUSTER_REGION}
STATUS=$?
if [ $STATUS == 0 ]; then
logMsg "Cluster NAME:${CLUSTER_NAME} ID:${CLUSTERID} launched successfully"
CLUSTERUP=1
break
else
logMsg "Cluster ERROR: launch failure NAME:${CLUSTER_NAME} ID:${CLUSTERID} Attempt ${CURR_ATTEMPT} of ${RETRIES} "
CLUSTERUP=0
# Fall into the next iteration of the loop to try and create the cluster again
fi
else
logMsg "Cluster ERROR: no cluster ID returned NAME:${CLUSTER_NAME}"
CLUSTERUP=0
fi
CURR_ATTEMPT=$[$CURR_ATTEMPT+1]
logMsg "Delaying ${RETRY_DELAY} seconds before attempting to create cluster..."
sleep ${RETRY_DELAY}
done
####
## Phase 3.5 - poll the cluster for status so we know when it's done
####
if [ $CLUSTERUP == 1 ]; then
# We have a cluster provisioned...now we can poll it's tasks and make sure it completes ok
pollCluster $PROFILE $CLUSTERID $CLUSTER_NAME
STATUS=$?
if [ $STATUS == 0 ]; then
logMsg "Cluster SUCCESS NAME:${CLUSTER_NAME} ID:${CLUSTERID}"
RETCODE=0
else
logMsg "Cluster ERROR:task failure NAME:${CLUSTER_NAME} ID:${CLUSTERID}"
RETCODE=4
fi
else
logMsg "Unable to provision a new cluster after ${RETRIES} attempts"
RETCODE=6
fi
fi
exit ${RETCODE}