-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
92 lines (70 loc) · 3.14 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# =====================================
# Read in the feature & activity names.
# =====================================
# Read in the column names (aka, feature names).
features <- read.table("features.txt")
# Feature names are in the 2nd column of the data we just read.
features <- features$V2
# Read in the lookup data for the activies. This lookup table
# will be used to replace the activity codes in the y_ files
# with descriptive names.
labels <- read.table("activity_labels.txt")
# Label the column names in the activity lookup table.
names(labels) <- c("activity_code", "activity")
# =====================================
# Read in the training data set.
# =====================================
# The 7352 observations of 561 variables.
X_train <- read.table("train/X_train.txt")
# The activity codes for our 7352 observations.
y_train <- read.table("train/y_train.txt")
# The subject numbers (IDs) for our 7352 observations.
subject_train <- read.table("train/subject_train.txt")
# =====================================
# Read in the test data set.
# =====================================
# The 2947 observations of our 561 variables.
X_test <- read.table("test/X_test.txt")
# The activity codes for our 2947 observations.
y_test <- read.table("test/y_test.txt")
# The subject numbers (IDs) for our 2947 observations.
subject_test <- read.table("test/subject_test.txt")
# =======================================
# Combine the testing and training data.
# =======================================
X_data <- rbind(X_train, X_test)
y_data <- rbind(y_train, y_test)
subject_data <- rbind(subject_train, subject_test)
# =======================================
# Label the columns in our dataframes.
# =======================================
# Label the combined observations with the feature names.
colnames(X_data) <- features
# Label the subject and activity_code columns.
colnames(subject_data) <- "subject"
colnames(y_data) <- "activity_code"
# =======================================
# Use the activity lookup table to create a vector
# of activity names from the activity code column.
# =======================================
activity <- as.character(labels[match(y_data$activity_code, labels$activity_code), "activity"])
# =======================================
# Select from our observations only the mean
# and std variables.
# =======================================
selected_data <- X_data[,grepl("mean\\(\\)|std\\(\\)", colnames(X_data))]
# =======================================
# Combine the subject IDs, the activity names, and the
# mean & std observations together. This is our first
# "tidy" data set, used to create the uploaded .txt file.
# =======================================
data <- cbind(subject_data, activity, selected_data)
# =======================================
# For each subject-activity pair, compute the mean of
# each observation. This is our second tidy data set.
# =======================================
means_by_subject_and_activity = aggregate( .~ subject + activity, data=data, FUN=mean )
# =======================================
# Output the final data set.
# =======================================
print(means_by_subject_and_activity)