.Rhistory

sum(filename$V4)
head(filename)
sum(filename$V5)
sum(filename$V4)
sum(filename$V6)
sum(filename$V2)
round(sum(filename$V2))
filename<-read.fwf(file=url("http://www.cpc.ncep.noaa.gov/data/indices/wksst8110.for"), skip = 4,widths =c(12, 7,4, 9,4, 9,4) )
filename<-read.fwf(file=url("http://www.cpc.ncep.noaa.gov/data/indices/wksst8110.for"), skip = 4,widths =c(12, 7,4, 9,4, 9,4) )
filename
filename<-read.fwf(file=url("http://www.cpc.ncep.noaa.gov/data/indices/wksst8110.for"), skip = 4,widths =c(12, 7,4, 9,4, 9,4) )
filename
head(filename)
filename<-read.fwf(file=url("http://www.cpc.ncep.noaa.gov/data/indices/wksst8110.for"),
skip = 4,
widths =c(12, 7,4, 9,4, 9,4 , 9 ,4) )
filename
head(filename)
sum(filename$V4)
filename
names(filename)
filename$V4
sum(filename$V4)
sum(filename$V5)
sum(filename$V6)
sum(filename$V3)
sum(filename$V1)
sum(filename$V2)
filename
count(filename)
tail(filename)
filename<-read.fwf(file=url("https://d396qusza40orc.cloudfront.net/getdata%2Fwksst8110.for"),
skip = 4,
widths =c(12, 7,4, 9,4, 9,4 , 9 ,4) )
fileUrl <- "https://d396qusza40orc.cloudfront.net/getdata%2Fwksst8110.for")
download.file(fileUrl, destfile="data/sst.csv",method="curl")
fileUrl <- "https://d396qusza40orc.cloudfront.net/getdata%2Fwksst8110.for"
download.file(fileUrl, destfile="data/sst.csv",method="curl")
download.file(fileUrl, destfile="sst.csv",method="curl")
getwd()
filename<-read.fwf(file="sst.csv",
skip = 4,
widths =c(12, 7,4, 9,4, 9,4 , 9 ,4) )
filename
tail(filename)
head(filename)
sum(filename$V4)
fileUrl <- "https://d396qusza40orc.cloudfront.net/getdata%2Fdata%2Fss06hid.csv"
download.file(fileUrl, destfile="one.csv",method="curl")
##list.files("../data")
file_got<-read.csv("one.csv")
file_got
names(file_got)
head(file_got)
file_got$HHL
agriculturevalue<-file_got$AGS
file_got$AGS
is.na(file_got$AGS)
file_got$AGS
file_got$AGS==6
table(file_got$AGS)
summary(file_got)
table(file_got$AGS,file_got$HHL)
all(file_got$AGS)
file_got[file_got$HHL,file_got$AGS>10]
file_got[(file_got$HHL,file_got$AGS>10),]
file_got[(file_got$HHL&file_got$AGS>10),]
head(file_got)
library(reshape2)
head(mtcars)
library(reshape2)
head(mtcars)
mtcars$carname<-rownames(mtcars)
head(carMelt,n=3)
library(reshape2)
head(mtcars)
mtcars$carname<-rownames(mtcars)
carMelt<-melt(mtcars,id=c("carname","gear","cyl"),measure.vars=c("mpg","hp"))
head(carMelt,n=3)
tail(carMelt,n=3)
cylData<-dcast(carMelt,cyl~variable)
cylData<-dcast(carMelt,cyl~variable,fun.aggregate =NULL)
cylData<-dcast(carMelt,cyl~variable,mean)
cylData
head(InsectSprays)
tapply(InsectSprays$count,InsectSprays$spray,sum)
spIns=split(InsectSprays$count,InsectSprays$spray)
spIns
library(swirl)
swirl()
library(dplyr)
quit
bye
byeswirl
info()
bye()
swirl)()
swirl()
library(swirl)
swirl()
library(dplyr)
cran<-tbl_df(mydf)
rm(mydf)
rm("mydf")
cran
?group_by
by_package<-group_by(cran,package)
by_package
summarize(by_package,mean(size))
submit()
submit()
submit()
tbl
pack_sum
tail(cran)
by_package
quantile(pack_sum$count,probs=0.99)
top_counts<-filter(pack_sum,count>679)
top_counts
head(top_counts,20)
arrange()
arrange(pack_sum,desc(count))
arrange(top_counts,desc(count))
quantile(pack_sum$unique,probs=0.99)
filter(pack_sum,unique>465)
filter(pack_sum, unique > 465)
top_unique<-filter(pack_sum, unique > 465)
top_unique
arrange(top_unique,desc(unique))
submit()
submit
()
submit()
submit()
submit()
submit()
submit()
submit()
submit()
submit()
submit()
submit()
library(tidyr)
students
?gather
gather(students,sex,count,-grade)
students2
res<-gather(students2,sex_class,value -grade)
res<-gather(students2,sex_class,count -grade)
?gather
res<-gather(students2,sex_class,count)
res<-gather(students2,sex_class,count -grade)
res<-gather(students2, sex_class, count -grade)
res<-gather(students2, sex_class, count, -grade)
res
?seperate
?seperate()
??seperate
?seperate
'??seperate'
?seperate
'??seperate'
?separate
separate(res,sex_class,c("sex","class"))
submit()
student3
students3
submit()
submit()
submit()
submit()
?spread()
?spread
submit()
submit()
submit()
submit()
submit()
submit()
View(students3)
View(students3)
quit()
x<-rnorm(100)
hist(x)
x
x<-rnorm(100)
hist(x)
y<-rnorm(100)
hist(y)
plot(x,y)
par(mar=c(2,2,2,2))
plot(x,y)
plot(x,y,pch=20)
title("Scatterplot")
text(-2,-2,"label")
legend("topleft",legend="Data")
legend("topleft", legend="Data",pch=20)
fit<-lm(y~x)
abline(fit)
abline(fit,lwd=3)
abline(fit,lwd=3,col="blue")
plot(x,y,xlab="weight",ylab="Height",main="Scatterplot",pch=20)
legend("topright",legend="Data",pch=20)
fit<-lm(x~y)
abline(fit,lwd=3,col"red)")
abline(fit,lwd=3,col"red)"
abline(fit,lwd=3,col"red")
abline(fit,lwd=3,col="red")
z<-rpois(100,2)
par(mfrow=c(1,2))
plot(x,y,pch=20)
plot(x,z,pch=20)
par(mfrow=c(2,1))
par(mfrow=c(2,2))
plot(x,y)
par(mar=c(4,4,2,2))
plot(x,y)
plot(x,z)
plot(y,z)
summary(cars)
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",")
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",") ## reading the file
stepsperday<-aggregate(steps~date,filename,FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
mean(stepsperday$steps) ## calculating the mean
median(stepsperday$steps)
stepsperinterval<-aggregate(steps~interval,filename,FUN=mean,na.rm = TRUE)
names(stepsperinterval)[2]<-"averagesteps"
plot(stepsperinterval$interval,stepsperinterval$averagesteps,type="l",xlab="5 min interval",ylab ="Average number of steps",main="Average Daily Activity in a 5 min interval")
maxinterval<-stepsperinterval[which.max(stepsperinterval$averagesteps),] ## finding the maximum interval
sum(is.na(filename)) ## finding the numv=ber of NA's
data<-merge(filename,stepsperinterval,by.x="interval") ## merging the original file and average steps perinterval file
data<-data[(order(data$date,data$interval)),]  ## ordering it by date and interval
View(data)
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",") ## reading the file
stepsperday<-aggregate(steps~date,filename,FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
mean(stepsperday$steps) ## calculating the mean
median(stepsperday$steps)
stepsperinterval<-aggregate(steps~interval,filename,FUN=mean,na.rm = TRUE)
names(stepsperinterval)[2]<-"averagesteps"
plot(stepsperinterval$interval,stepsperinterval$averagesteps,type="l",xlab="5 min interval",ylab ="Average number of steps",main="Average Daily Activity in a 5 min interval")
maxinterval<-stepsperinterval[which.max(stepsperinterval$averagesteps),] ## finding the maximum interval
sum(is.na(filename)) ## finding the numv=ber of NA's
data<-merge(filename,stepsperinterval,by.x="interval") ## merging the original file and average steps perinterval file
data<-data[(order(data$date,data$interval)),]  ## ordering it by date and interval
rownames(data)<-NULL
for(i in 1:nrow(data))
{
if(is.na(data$steps[i])) {
data$steps[i]<-data$averagesteps[i]
}
}
newdata<-select(data,steps,date,interval) ## selecting the required number of columns
View(newdata)
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",") ## reading the file
stepsperday<-aggregate(steps~date,filename,FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
mean(stepsperday$steps) ## calculating the mean
median(stepsperday$steps)
stepsperinterval<-aggregate(steps~interval,filename,FUN=mean,na.rm = TRUE)
names(stepsperinterval)[2]<-"averagesteps"
plot(stepsperinterval$interval,stepsperinterval$averagesteps,type="l",xlab="5 min interval",ylab ="Average number of steps",main="Average Daily Activity in a 5 min interval")
maxinterval<-stepsperinterval[which.max(stepsperinterval$averagesteps),] ## finding the maximum interval
sum(is.na(filename)) ## finding the numv=ber of NA's
data<-merge(filename,stepsperinterval,by.x="interval") ## merging the original file and average steps perinterval file
data<-data[(order(data$date,data$interval)),]  ## ordering it by date and interval
rownames(data)<-NULL
for(i in 1:nrow(data))
{
if(is.na(data$steps[i])) {
data$steps[i]<-data$averagesteps[i]
}
}
newdata<-select(data,steps,date,interval) ## selecting the required number of columns
mean(newdata$steps)
data$steps[1]
data$steps[2]
View(newdata)
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",") ## reading the file
stepsperday<-aggregate(steps~date,filename,FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
mean(stepsperday$steps) ## calculating the mean
median(stepsperday$steps)
stepsperinterval<-aggregate(steps~interval,filename,FUN=mean,na.rm = TRUE)
names(stepsperinterval)[2]<-"averagesteps"
plot(stepsperinterval$interval,stepsperinterval$averagesteps,type="l",xlab="5 min interval",ylab ="Average number of steps",main="Average Daily Activity in a 5 min interval")
maxinterval<-stepsperinterval[which.max(stepsperinterval$averagesteps),] ## finding the maximum interval
sum(is.na(filename)) ## finding the numv=ber of NA's
data<-merge(filename,stepsperinterval,by.x="interval") ## merging the original file and average steps perinterval file
data<-data[(order(data$date,data$interval)),]  ## ordering it by date and interval
rownames(data)<-NULL
head(data)
head(filename)
data[,data$interval=5]
data[,data$interval==5]
data[data$interval==5,]
class(data$steps)
is.na(data$steps[1])
nrow(data)
for(i in 1:nrow(data))
{
if(is.na(data$steps[i])) {
data$steps[i]<-data$averagesteps[i]
}
}
head(data)
newdata<-select(data,steps,date,interval) ## selecting the required number of columns
mean(newdata$steps)
mean(filename$steps)
mean(filename$steps, na.rm=true)
mean(filename$steps, na.rm=TRUE)
median(newdata$steps)
median(filename$steps)
median(filename$steps, na.rm= TRUE)
?mean
source('~/.active-rstudio-document', echo=TRUE)
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",") ## reading the file
stepsperday<-aggregate(steps~date,filename,FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")
mean(stepsperday$steps) ## calculating the mean
median(stepsperday$steps)
stepsperinterval<-aggregate(steps~interval,filename,FUN=mean,na.rm = TRUE)
names(stepsperinterval)[2]<-"averagesteps"
stepsinterval
stepsperinterval
head(stepsperinterval)
=288/24
288/24
head(stepsperinterval)
max(stepsperinterval$averagesteps)
stepsperinterval<-aggregate(steps~interval+day,filename,FUN=mean,na.rm = TRUE)
stepsperinterval<-aggregate(steps~interval+date,filename,FUN=mean,na.rm = TRUE)
View(stepsperinterval)
View(stepsperinterval)
hist(stepsperinterval$steps)
?aggregate
10000/24
stepsperday1<-aggregate(steps,by=date,filename,FUN = sum)
stepsperday1<-aggregate(steps,by=date,data=filename,FUN = sum)
stepsperday1<-aggregate(x=steps,by=date,data=filename,FUN = sum)
stepsperday1<-aggregate(x=steps,by=c(date),data=filename,FUN = sum)
stepsperday1<-aggregate(x=steps,by=list(date),data=filename,FUN = sum)
stepsperday1<-aggregate(x=filename,by=list(date),FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
stepsperday1<-aggregate(x=filename,by=list(date),FUN = sum)
stepsperday<-aggregate(steps~date,filename,FUN = sum)
stepsperday
stepsperday<-aggregate(steps~date,newdata,FUN = sum)
mean(newdata$steps)
newstepsperday<-aggregate(steps~date,newdata,FUN = sum)
mean(newdstepsperday$steps)
View(newstepsperday)
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",") ## reading the file
stepsperday<-aggregate(steps~date,filename,FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
mean(stepsperday$steps) ## calculating the mean
median(stepsperday$steps)
stepsperinterval<-aggregate(steps~interval+date,filename,FUN=mean,na.rm = TRUE)
names(stepsperinterval)[2]<-"averagesteps"
plot(stepsperinterval$interval,stepsperinterval$averagesteps,type="l",xlab="5 min interval",ylab ="Average number of steps",main="Average Daily Activity in a 5 min interval")
maxinterval<-stepsperinterval[which.max(stepsperinterval$averagesteps),] ## finding the maximum interval
sum(is.na(filename)) ## finding the numv=ber of NA's
data<-merge(filename,stepsperinterval,by.x="interval") ## merging the original file and average steps perinterval file
data<-data[(order(data$date,data$interval)),]  ## ordering it by date and interval
rownames(data)<-NULL
misscount<-0
for(i in 1:nrow(data))
{
if(is.na(data$steps[i])) {
data$steps[i]<-data$averagesteps[i]
misscount<-misscount+1
}
}
newdata<-select(data,steps,date,interval) ## selecting the required number of columns
newstepsperday<-aggregate(steps~date,newdata,FUN = sum)
View(newstepsperday)
View(stepsperday)
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",") ## reading the file
stepsperday<-aggregate(steps~date,filename,FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
mean(stepsperday$steps) ## calculating the mean
median(stepsperday$steps)
stepsperinterval<-aggregate(steps~interval,filename,FUN=mean,na.rm = TRUE)
names(stepsperinterval)[2]<-"averagesteps"
plot(stepsperinterval$interval,stepsperinterval$averagesteps,type="l",xlab="5 min interval",ylab ="Average number of steps",main="Average Daily Activity in a 5 min interval")
maxinterval<-stepsperinterval[which.max(stepsperinterval$averagesteps),] ## finding the maximum interval
sum(is.na(filename)) ## finding the numv=ber of NA's
data<-merge(filename,stepsperinterval,by.x="interval") ## merging the original file and average steps perinterval file
data<-data[(order(data$date,data$interval)),]  ## ordering it by date and interval
rownames(data)<-NULL
misscount<-0
for(i in 1:nrow(data))
{
if(is.na(data$steps[i])) {
data$steps[i]<-data$averagesteps[i]
misscount<-misscount+1
}
}
newdata<-select(data,steps,date,interval) ## selecting the required number of columns
newstepsperday<-aggregate(steps~date,newdata,FUN = sum)
mean(newStepsPerDay$steps)
newStepsPerDay-aggregate(steps~date,newdata,FUN = sum)
newStepsPerDay<-aggregate(steps~date,newdata,FUN = sum)
mean(newStepsPerDay$steps)
median(newStepsPerDay$steps)
source('~/.active-rstudio-document')
ggplot(newStepsPerDay,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
hist(newStepsPerDay$steps)
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",") ## reading the file
stepsperday<-aggregate(steps~date,filename,FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
mean(stepsperday$steps) ## calculating the mean
median(stepsperday$steps)
stepsperinterval<-aggregate(steps~interval,filename,FUN=mean,na.rm = TRUE)
names(stepsperinterval)[2]<-"averagesteps"
plot(stepsperinterval$interval,stepsperinterval$averagesteps,type="l",xlab="5 min interval",ylab ="Average number of steps",main="Average Daily Activity in a 5 min interval")
maxinterval<-stepsperinterval[which.max(stepsperinterval$averagesteps),] ## finding the maximum interval
sum(is.na(filename)) ## finding the numv=ber of NA's
data<-merge(filename,stepsperinterval,by.x="interval") ## merging the original file and average steps perinterval file
data<-data[(order(data$date,data$interval)),]  ## ordering it by date and interval
rownames(data)<-NULL
misscount<-0
for(i in 1:nrow(data))
{
if(is.na(data$steps[i])) {
data$steps[i]<-data$averagesteps[i]
misscount<-misscount+1
}
}
newdata<-select(data,steps,date,interval) ## selecting the required number of columns
newStepsPerDay<-aggregate(steps~date,newdata,FUN = sum)
mean(new_steps_per_day$steps)
median(new_steps_per_day$steps)
ggplot(new_steps_per_day,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")
setwd("/Users/himabindu/repdata_peerassg1")
library(knitr)
library(ggplot2)
library(dplyr)
unzip("activity.zip")
filename<-read.csv("activity.csv",header = TRUE,sep=",") ## reading the file
stepsperday<-aggregate(steps~date,filename,FUN = sum)
ggplot(stepsperday,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")        +labs(x= steps)
mean(stepsperday$steps) ## calculating the mean
median(stepsperday$steps)
stepsperinterval<-aggregate(steps~interval,filename,FUN=mean,na.rm = TRUE)
names(stepsperinterval)[2]<-"averagesteps"
plot(stepsperinterval$interval,stepsperinterval$averagesteps,type="l",xlab="5 min interval",ylab ="Average number of steps",main="Average Daily Activity in a 5 min interval")
maxinterval<-stepsperinterval[which.max(stepsperinterval$averagesteps),] ## finding the maximum interval
sum(is.na(filename)) ## finding the numv=ber of NA's
data<-merge(filename,stepsperinterval,by.x="interval") ## merging the original file and average steps perinterval file
data<-data[(order(data$date,data$interval)),]  ## ordering it by date and interval
rownames(data)<-NULL
misscount<-0
for(i in 1:nrow(data))
{
if(is.na(data$steps[i])) {
data$steps[i]<-data$averagesteps[i]
misscount<-misscount+1
}
}
newdata<-select(data,steps,date,interval) ## selecting the required number of columns
new_steps_per_day<-aggregate(steps~date,newdata,FUN = sum)
mean(new_steps_per_day$steps)
median(new_steps_per_day$steps)
ggplot(new_steps_per_day,aes(steps)) +geom_histogram(fill=NA, color="blue") + labs(title="Total number of steps per day")
ggplot(new_steps_per_day,aes(steps)) +geom_histogram(fill=NA, color="red") + labs(title="Total number of steps per day")