-
Notifications
You must be signed in to change notification settings - Fork 0
/
combine.sh
executable file
·86 lines (80 loc) · 3.65 KB
/
combine.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/bin/bash
# File: combine.sh #
# Author: Maggie Schweihs #
# Class: DS730, Fall 2017 #
# Purpose: After running a MapReduce job in the cloud (AWS), we are left with multiple part-##### files. #
# This script is designed to run after the AWS S3 bucket is synced to an empty folder on a Linux machine #
# (Ubuntu Server 16.04 LTS) #
# For Info on AWS S3 Sync utility: http://docs.aws.amazon.com/cli/latest/reference/s3/sync.html #
# #
# Caveat: Only the part-##### files from one job should be in the folder. Files other than part-##### are ok.#
# Create a new folder for each job! #
# #
# Usage: Specify all options -d <directory> -o <output_dir> -f <filename> #
# where -d is the directory containing the part-##### files, #
# -o is the directory to contain the output file, #
# and -f is the filename of the output file. #
##############################################################################################################
usage ()
{
echo ''
echo 'Usage : Script -d <directory> -o <output_dir> -f <filename>i -h <help>'
echo ''
echo 'Please enter the directory in which the files are located, the output directory '
echo 'and the filename of the output file.'
exit
}
#-d, -o, -f are required, h is optional
while getopts ":d:o:f:h" opt; do
if [[ -z $opt ]]; then
usage
exit
fi
case "$opt" in
d ) #specify directory of files to combine
DIRECTORY=${OPTARG}
if [[ "$DIRECTORY" = "" ]]; then
echo "Please enter the directory containing part* files"
usage
exit 0
fi
;;
o ) #specify output directory
OUTDIR=${OPTARG}
if [[ "$OUTDIR" = "" ]]; then
echo "Please enter the output directory."
usage
exit 0
fi
;;
f ) #specify output filename
FILENAME=${OPTARG}
if [[ "$FILENAME" = "" ]]; then
echo "Please enter a filename."
usage
exit 0
fi
;;
h | * | ? ) #display help
usage
exit 0
;;
esac
done
#Check if no options were passed
if [ $OPTIND -eq 1 ]; then
echo "No options were passed"
usage
exit;
fi
#do work
cat $DIRECTORY/part* > $OUTDIR/$FILENAME
#human-like sort
sort -g $OUTDIR/$FILENAME -o $OUTDIR/$FILENAME
#Check if the file exists!
if [ -s $OUTDIR/$FILENAME ]; then
echo "File created: $OUTDIR/$FILENAME"
elif ![ -s $OUTDIR/$FILENAME ]; then
echo "Something went wrong!"
fi
exit