-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHW3_mushrooms.Rmd
66 lines (56 loc) · 2.24 KB
/
HW3_mushrooms.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
---
title: "HW3 - Mushrooms"
output: html_document
---
#### Walt Wells - 07.23-07.30.2016
### Prepare Environment and Load Data
```{r warning=FALSE, message=FALSE}
# Set Environment
if (!require("RCurl")) install.packages('RCurl')
if (!require("data.table")) install.packages('data.table')
if (!require("DT")) install.packages('DT')
if (!require("plyr")) install.packages('plyr')
# Import Data
if(!exists("dat")) {
URL <- "https://raw.githubusercontent.com/wwells/CUNYBridge_R/master/agaricus-lepiota.data"
x <- getURL(URL)
dat <- fread(x, header=FALSE)
}
# Change to DF instead of DT
dat <- data.frame(dat)
# Preview
head(dat)
```
### Subset
```{r}
# choose class, cap-shape, cap-surface, cap-color, population, and habitat
## these decisions are arbitratry for the purposes of this project. we'd otherwise
## want to do some additional data exploration before finalizing our subset
mdat <- dat[,c(1:4, 22, 23)]
# Preview Data
head(mdat)
```
### Rename Columns and Data
``` {r}
# Update vars and colnames by creating new cols
mdat$class <- revalue(mdat$V1, c("e"="edible", "p"="poisonous"))
mdat$class <- factor(mdat$class)
mdat$cshape <- revalue(mdat$V2, c("b"="bell", "c"="conical", "x"="convex",
"f"="flat", "k"="knobbed", "s"="sunken"))
mdat$csurface <- revalue(mdat$V3, c("f"="fibrous", "g"="grooves", "y"="scaly",
"s"="smooth"))
mdat$ccolor <- revalue(mdat$V4, c("n"="brown", "b"="buff", "c"="cinnamon",
"g"="gray", "r"="green", "p"= "pink",
"u"="purple", "e"="red", "w"="white",
"y"="yellow"))
mdat$population <- revalue(mdat$V22, c("a"="abundant", "c"="clustered",
"n"="numerous", "s"="scattered",
"v"="several", "y"="solitary"))
mdat$habitat <- revalue(mdat$V23, c("g"="grasses", "l"="leaves", "m"="meadows",
"p"="paths", "u"="urban", "w"="waste",
"d"="woods"))
# Remove raw columns, once accuracy of renaming assured
mdat <- mdat[, c(7:12)]
# Use datatable to review curated data
datatable(mdat, options = list(pageLength = 5))
```