Skip to content

Commit

Permalink
improve DB input
Browse files Browse the repository at this point in the history
  • Loading branch information
silberzwiebel committed May 18, 2018
1 parent 17fbb56 commit e821ff2
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 20 deletions.
3 changes: 2 additions & 1 deletion src/000_run_pipeline.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ source("src/00_install_R_packages.R", echo = TRUE)
source("src/01_download_data.R", echo = TRUE)
source("src/02_cars_to_db.R", echo = TRUE)
source("src/02_bikes_to_db.R", echo = TRUE)
source("src/03_temporal_features.R", echo = TRUE)
# TODO do we need this table / probably convert to weather table?
# source("src/03_temporal_features.R", echo = TRUE)
40 changes: 25 additions & 15 deletions src/02_bikes_to_db.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# load libraries ####
# use 00_install_R_packages.R for installing missing packages
sapply(c("dplyr", "DBI", "RSQLite", "tidyr", "lubridate"),
sapply(c("dplyr", "DBI", "RSQLite", "tidyr", "chron", "lubridate"),
require, character.only = TRUE)

file <- "data/raw/Fahrradzaehlstellen-Stundenwerte.csv"
Expand All @@ -22,28 +22,38 @@ bikes <-
# wide to long format
gather(location, count, -date, -hour, -weather, -temperature, -windspeed) %>%
mutate(date = as.character(dmy(date))) %>%
mutate(hour = as.integer(substring(hour, 1, 2))) %>%
mutate(vehicle = "bike") #%>%
mutate(year = as.integer(year(date))) %>%
mutate(month = as.integer(month(date))) %>%
mutate(day = as.integer(day(date))) %>%
mutate(weekday = wday(date, label = T, abbr = T)) %>%
mutate(weekend = is.weekend(date)) %>%
mutate(hour = as.integer(substring(hour, 1, 2))) %>%
mutate(vehicle = "bike")

# write 'bikes' to SQLite database
dir.create("data/database", showWarnings = F)
con <- dbConnect(SQLite(), dbname = "data/database/traffic_data.sqlite")
dbWriteTable(con, "bikes", bikes, row.names = F, overwrite = T)

dbExecute(con, "CREATE INDEX timestamp_bikes on bikes (date, hour)")
dbExecute(con, "CREATE INDEX year_month_day_bikes on bikes (year, month, day, hour)")

# TODO: make the weather data an own table
# add the same weather to cars table
cars <- dbGetQuery(conn = con, "SELECT location, count, date, hour, vehicle FROM cars")

weather_from_bikes <-
bikes %>%
select(date, hour, weather, windspeed, temperature) %>%
filter(weather != "")

cars <-
cars %>%
inner_join(., weather_from_bikes, by = c("date", "hour"))

dbWriteTable(con, "cars", cars, row.names = F, overwrite = T)
# cars <- dbGetQuery(conn = con, "SELECT location, count, date, hour, vehicle FROM cars")
#
# weather_from_bikes <-
# bikes %>%
# select(date, hour, weather, windspeed, temperature) %>%
# filter(weather != "")
#
# cars <-
# cars %>%
# inner_join(., weather_from_bikes, by = c("date", "hour"))
#
# dbWriteTable(con, "cars", cars, row.names = F, overwrite = T)

# for better performance, DB is read-only in shiny-app
dbExecute(con, "PRAGMA synchronous=OFF; PRAGMA journal_mode=OFF;")

dbDisconnect(con)
13 changes: 9 additions & 4 deletions src/02_cars_to_db.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# load libraries ####
# use 00_install_R_packages.R for installing missing packages
sapply(c("dplyr", "assertthat", "lubridate", "tidyr", "DBI", "RSQLite"),
sapply(c("dplyr", "assertthat", "lubridate", "chron", "tidyr", "DBI", "RSQLite"),
require, character.only = TRUE)

process_df <- function(df) {
Expand Down Expand Up @@ -38,9 +38,8 @@ process_df <- function(df) {
# filter to only add relevant location to the database
# as of now: Roxel and all locations where also bicycles are counted
relevant_locations <-
c("24020", "24100", "24140", "24010", "24120", "24130", "24030", # Roxel
# locations where (closeby) also bicycles are counted, in the same order as http://www.stadt-muenster.de/verkehrsplanung/verkehr-in-zahlen/radverkehrszaehlungen.html
"01080", # Neutor
c(# locations where (closeby) also bicycles are counted, in the same order as http://www.stadt-muenster.de/verkehrsplanung/verkehr-in-zahlen/radverkehrszaehlungen.html
"01080", # Neutor
"04050", # Wolbecker Straße / Servatiiplatz
"03052", # Hüfferstraße
"07030", # Hammer Straße
Expand All @@ -61,6 +60,11 @@ process_df <- function(df) {
df <-
df %>%
gather(hour, count, -location, -date) %>%
mutate(year = as.integer(year(date))) %>%
mutate(month = as.integer(month(date))) %>%
mutate(day = as.integer(day(date))) %>%
mutate(weekday = wday(date, label = T, abbr = T)) %>%
mutate(weekend = is.weekend(date)) %>%
# 'hour' to integer format
mutate(hour = substring(hour, 2)) %>%
mutate(hour = as.integer(hour)) %>%
Expand Down Expand Up @@ -92,5 +96,6 @@ for (raw_file in raw_files) {
}

dbExecute(con, "CREATE INDEX timestamp_cars on cars (date, hour)")
dbExecute(con, "CREATE INDEX year_month_day_cars on cars (year, month, day, hour)")
dbExecute(con, "CREATE INDEX location_cars on cars (location)")
dbDisconnect(con)

0 comments on commit e821ff2

Please sign in to comment.