-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path02.spatialDataCleaning&Preparation.R
98 lines (79 loc) · 3.09 KB
/
02.spatialDataCleaning&Preparation.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Maximising memory
options(java.parameters = "-Xmx6g")
# Loading required libraries
library(dplyr)
library(ggplot2)
library(countrycode)
library(CoordinateCleaner)
library(tidyverse)
library(rworldmap)
##############################################
# Cleaning GBIF data
# Reading GBIF data
gbif_data <- read_csv("data/gbif_data.csv")
# Remove duplicate records
gbif_data1 <- gbif_data[!duplicated(gbif_data),]
# Convert country code from ISO2c to ISO3c
gbif_data1$countryCode <- countrycode(gbif_data1$countryCode, origin = 'iso2c', destination = 'iso3c')
# Flag problems
gbif_data1 <- data.frame(gbif_data1)
flags <- clean_coordinates(x = gbif_data1, lon = "decimalLongitude", lat = "decimalLatitude",
countries = "countryCode",
species = "species",
tests = c("centroids", "gbif",
"zeros", "countries")) # most test are on by default
# summary(flags)
# plot(flags, lon = "decimalLongitude", lat = "decimalLatitude")
# Exclude problematic records
gbif_data_cl <- gbif_data1[flags$.summary,]
# Writing output
write_csv(gbif_data_cl, "data/cleanedRecords_GBIF.csv")
# Clearing memory
rm(gbif_data, gbif_data1, flags)
##############################################
# Cleaning Facebook records
# Reading Facebook data
fb_data <- read_csv("data/facebookDataUp.csv")
# Changing column names
colnames(fb_data) <- c("class", "order", "family", "species", "commonName", "iucn", "lifeStage", "date",
"month", "year", "location", "decimalLatitude", "decimalLongitude", "photographer",
"comment", "status")
# Selecting columns of interests
fb_data <- fb_data %>%
dplyr::select(species, decimalLongitude, decimalLatitude)
# Remove blank cells
fb_data <- fb_data[!(is.na(fb_data$species) | fb_data$species == ""),]
fb_data <- fb_data[!(is.na(fb_data$decimalLongitude) | fb_data$decimalLongitude == ""),]
fb_data <- fb_data[!(is.na(fb_data$decimalLatitude) | fb_data$decimalLatitude == ""),]
# Writing output
write_csv(fb_data, "data/cleanedRecords_FB.csv")
##############################################
# Combining dataframes
##############################################
# Selecting columns of interests
gbif <- gbif_data_cl %>%
dplyr::select(species, decimalLongitude, decimalLatitude)
fb <- fb_data %>%
dplyr::select(species, decimalLongitude, decimalLatitude)
# Combining dataframes
combined_data <- rbind(fb, gbif)
# Remove species with low occurrence records
combined_data <- combined_data %>%
group_by(species) %>%
filter(n() > 3) %>%
ungroup()
# Writing output
write_csv(combined_data, "data/cleanedRecords_combined.csv")
# ##############################################
# gbif <- gbif_data_cl %>%
# dplyr::select(species, decimalLongitude, decimalLatitude)
# clean <- read.csv("data/cleanedRecords_prev.csv", header = T)
# combined_data <- rbind(clean, gbif)
#
# # Remove species with low occurrence records
# combined_data <- combined_data %>%
# group_by(species) %>%
# filter(n() > 3) %>%
# ungroup()
#
# write_csv(combined_data, "data/cleanedRecords.csv")