-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathUSGSdatapull.R
187 lines (126 loc) · 6.24 KB
/
USGSdatapull.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#playing around with USGS data in R
library(dataRetrieval)
#step 1 - identify site of interest by its site ID
#north santiam river at Niagra (14181500)
siteNumbers<-c("14181500","11501000","11486990","11502500","14211542")
siteInfo<-readNWISsite(siteNumbers)
#figure out what data is available for the site
whatdat<-whatNWISdata(siteNumber=siteNumbers, service="dv")
#get the daily data for a site - must specify parameter, do last 10 years
#parameters of interest
#00010 Temperature, water (deg C)
#00060 Discharge, cfs -aka daily mean stream flow
#statistical codes of interest
#00001 = maximum
#00002 = minimum
#00003 = mean
#00008 = median
dailydis<-readNWISdv(siteNumber="14181400", parameterCd="00060", startDate = (Sys.Date()-3650), statCd=c('00001','00002','00003',"00008"))
dailytemp<-readNWISdv(siteNumber=siteNumbers, parameterCd="00010", startDate=(Sys.Date()-3650), statCd=c('00001','00002','00003',"00008"))
#let's look at what data we can get for velocity and stream depth -
#lots of different parameter codes that might fit the bill, so let's look at data and see what we have
velocparam<-c("00055","70232", '72149', '72168',"72169","72190","72254","72255","72294","72321",
"72322","72323","81380","81904"
)
velocinfo<-readNWISpCode(velocparam)
depthparam<-c("00064","72178","72199","82903", "85310","85311")
depthinfo<-readNWISpCode(depthparam)
#see if we can get all the NWIS sites in Oregon -
#when I refine search to just surface water (no ocean) I get about 6,910 sites
nwisor<-whatNWISsites(stateCd="OR",siteType=c("ES","LK","SP","ST"))
#what data is there for velocity - very little (only 50 stations in all)
orveloc<-whatNWISdata(stateCd="OR",parameterCd=velocparam)
#what data is there for depth - more data, but not great (490 stations in all - some of it rather old)
ordepth<-whatNWISdata(stateCd="OR",parameterCd=depthparam)
#compared with discharge? - have discharge data at 1,708 stations
ordis<-whatNWISdata(stateCd="OR",parameterCd="00060")
#get some data - can't through NWIS - must go through WQP...
dailyveloc<-readWQPqw(siteNumber="USGS-11502500", parameterCd=velocparam)
dailydepth<-readWQPqw(siteNumber='USGS-14206690', parameterCd=c("00064","72178","72199","82903", "85310","85311"))
###Lets do some calculations######
#copying in the information from Vanessa and Ryan Michie so that we can calculate important MZ flow statistics such as 7Q10, 1Q10 and 30Q5
#...will likely need to develop a different code to take care of the harmonic mean flow
library(RCurl)
library(lubridate)
library(zoo)
library(dplyr)
library(devtools)
devtools::install_github('DEQrmichie/dflowR', host = 'https://api.github.com',
dependencies= TRUE, force = TRUE, upgrade='never')
library(dflowR)
#--- USGS web download example ------------------------------------------------------------------------------------
#if you want a station with a patchier dataset, try USGS gauge 12118500
# download
q.df <- readNWISdv(siteNumbers = "14174000",
parameterCd = "00060",
startDate = "1970-10-01",
endDate = "2022-01-30",
statCd = "00003")
# Just get columns 3 and 4 (date and flow)
q <- q.df[,c(3,4)]
colnames(q) <-c("date", "flow")
q$date <- as.POSIXct(q$date, format="%Y-%m-%d")
dflow(x=q, m=30, r=5, yearstart=NA, yearend=NA, wystart="10-01", wyend="09-30")
thirty<-dflow(x=q, m=30, r=5, yearstart=NA, yearend=NA, wystart="10-01", wyend="09-30")
#-------try out harmonic mean
#rossman terminology NDAYS, NZEROES, DSUM, DR
# interpretation =
#NDAYS is the total number of days of records,
#NZEROES is the number of days with 0 flow
#DSUM is the sum of the reciprocal of each value (e.g 1/x(1)+1/x(2)+1/x(3)+1/x(t)....)
#DR= (NDAYS-NZEROES)/NDAYS
#HMEAN - (NDAYS-NZEROES)/DSUM*DR
#rossman notes that the final estimate is the weighted average of the harmonic mean of non-zero flows and zero (which is where the NZEROES and DR come in)
#psych package has harmonic mean calculator
library(psych)
#test the function with a simple example
x<-c(1,5,4,6)
harmonic.mean(x,zero=FALSE)
#2.474227 - correct
#test with a zero in the mix, if "zero=FALSE" in the function it should treutnr the harmonic mean of the non-zero elements
z<-c(1,5,4,6,0)
harmonic.mean(z,zero=FALSE)
#2.474227 - same answer as before, excellent
#so we can just take this function and multiply it by DR and we should get the exact same thing that DFLOW would return
dr<-(length(z)-length(which(z==0)))/length(z)
dharm<-harmonic.mean(z,zero=FALSE)*dr
#1.979
#this appears to be working. Now to make it into a function
dharmonic<-function(x) {
return(harmonic.mean(x,zero=FALSE)*(length(x)-length(which(x==0)))/length(x))
}
dharmonic(z)
#great! This is working with our small examples, time to try actual flow data - take data from the original dflow example above
#need to remove date for this function to work
r<-q[,2]
dharmonic(r)
#function worked, though won't be able to verify against DFLOW until I speak more with Steve and Erich
#flow plot
library(ggplot2)
F1 <- ggplot(q, aes(x = date, y = flow)) +
geom_line() +
scale_x_datetime(date_breaks="5 years", date_labels="%Y") +
theme_bw() +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(),
legend.position = "none") +
labs(y = "Mean Daily Discharge (cfs)", x = "") +
scale_color_brewer(palette = "Set1")
F1
#monthly mean boxplots
q$Month<-format(q$date,"%b")
q$Month<-factor(q$Month, levels=month.abb)
Box1 <- ggplot(q, aes(x = Month, y = flow)) +
geom_boxplot() +
stat_summary(fun = mean, geom ="point", shape = 20, size=3, color ="red", fill ="red") +
theme_bw() +
theme(panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank(),
legend.position = "none") +
labs(y = "Discharge (cfs)", x = "") +
ggtitle("Boxplots of mean monthly flow") +
scale_color_brewer(palette = "Set1")
Box1
#figure out any missing dates
q$asDate<-as.Date(q$date)
DateRange<-seq(min(q$asDate),max(q$asDate),by=1)
Missing<-DateRange[!DateRange %in% q$asDate]
df<-data.frame(Missing)