-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathflight_df.Rmd
62 lines (51 loc) · 1.94 KB
/
flight_df.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
---
title: "Create Flight Dataframe"
---
# Overview
In this notebook, I include the code to create a finalized flight dataframe, from the FAA provided excel sheets.
# Import Packages and Files
```{r}
library(readxl)
library(plyr)
library(lubridate)
library(dplyr)
```
```{r}
#importing 2019 data
flightdf1 <- read.csv("./data/flight/Olin2019dataset.csv", header=TRUE, fileEncoding="UTF-8-BOM")
#importing Jan 2020 data
flightdf2 <- read_excel("./data/flight/2020-Jan-Runway use start.xlsx")
#importing Feb - Apr 2020 data
lst <- lapply(1:3, function(i) read_excel("./data/flight/OlinFebruaryApril2020dataset.xlsx", sheet = i))
flightdf3 <- ldply(lst, data.frame)
```
# Format and Merge
## Formatting Datetime of Each
```{r}
#formatting datetime of 2019
flightdf1$Date <- with(flightdf1, mdy(Date) + hms(Time), tz = "America/New_York") #combining and setting timezone
flightdf1$Time <- NULL #deleting the time column
#formatting datetime of Jan 2020
flightdf2$Time <- format(flightdf2$Time,"%H:%M:%S")
flightdf2$Date <- with(flightdf2, ymd(Date) + hms(Time), tz = "America/New_York") #combining and setting timezone
flightdf2$Time <- NULL #deleting the time column
#formatting datetime of Feb- Apr 2020
flightdf3$Time <- format(flightdf3$Time,"%H:%M:%S")
flightdf3$Date <- with(flightdf3, ymd(Date) + hms(Time), tz = "America/New_York") #combining and setting timezone
flightdf3$Time <- NULL
```
## Merging
```{r}
#reformatting column names of flightdf2 in order to match the other column names
flightdf2<- flightdf2 %>% plyr::rename(c("AC Type" = "AC.Type", "Flight ID" = "Flight.ID","Reg No." = "Reg.No.","Dest/Orig" = "Dest.Orig" ))
#first merge
flightdf <- rbind(flightdf1, flightdf2)
#second merge
flightdf <- rbind(flightdf, flightdf3)
# Clean NAs
flightdf <- flightdf[!is.na(flightdf$RW),]
flightdf <- flightdf[!is.na(flightdf$Date),]
flightdf <- flightdf[!is.na(flightdf$Opr),]
#write to file
write.csv(flightdf, "finalflightdf.csv", row.names=F)
```