TMin_Mean_Range_trends.Rmd

---
title: "Assessing Climate Change Effects in VA Reservoirs"
subtitle: "Part 2: Deriving Temperature Trends by Month and Station (TMin, TMean, TRange)"
author: "Andrew Cameron"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(tidyverse)
```

## Data Preprocessing 

```{r `read in data`}
trendSites.df <- openxlsx::read.xlsx("data_original.xlsx", sheet = 2)
allData.df <- openxlsx::read.xlsx("data_original.xlsx", sheet = 3)

# Convert `date` column from Excel encoded date to a more legible date format. Otherwise date shows as numeric value, e.g. '44230'.
    allData.df$Date <- allData.df$Date * 86400      # 86400 = seconds in a day.
    allData.df$Date <- as.POSIXct(allData.df$Date, origin = "1899-12-30", tz = "UTC")
    
```

```{r 'remove anomlies'}
# a surface temperature (Tmax) of 2.8 C in August
# 13774 	6ACNR000.00 	8/28/1990 	0.3 	NA 	NA 	NA 	NA

which(grepl("13774", allData.df$X1))
allData.df[13770, (5:8)] <- NA

```

```{r `subset and filter data`}
station_IDS <- trendSites.df$FDT_STA_ID
excluded_sites <- c("2-JKS053.48",
                    "2-XDD000.40",
                    "4AROA192.55",
                    "4AROA196.05",
                    "5ASRN000.66",
                    "6ACNR000.00",
                    "6APNR008.15") # only surface water measurements ever taken

# subset 34 trend sites (41 - 7 excluded)
trendsites <- allData.df %>%
  mutate(MonthNum = lubridate::month(Date),
         Year = lubridate::year(Date)) %>%
  filter(FDT_STA_ID %in% station_IDS) %>%
  filter(!FDT_STA_ID %in% excluded_sites) %>%
  filter(MonthNum %in% 5:10)

# apply filters (based on 'Filter' column in "Station Specific Trend Sites.xlsx")
specialFiltersNeeded <- c("4ABWR017.42", "4AROA038.49", "4AROA192.94", "9-NEW098.32", "9-PKC004.16")

  special1 <- trendsites %>%
    filter(FDT_STA_ID == "4ABWR017.42") %>%
    filter(MaxDepth.x >= 5)
  
  special2 <- trendsites %>%
    filter(FDT_STA_ID == "4AROA038.49") %>%
    filter(MaxDepth.x >= 6)
  
  special3 <- trendsites %>%
    filter(FDT_STA_ID == "4AROA192.94") %>%
    filter(MaxDepth.x >= 3)
  
  special4 <- trendsites %>%
    filter(FDT_STA_ID == "9-NEW098.32") %>%
    filter(MaxDepth.x >= 6)
  
  special5 <- trendsites %>%
    filter(FDT_STA_ID == "9-PKC004.16") %>%
    filter(MaxDepth.x >= 3)


sites_less85max <- trendsites %>%
  filter(!FDT_STA_ID %in% specialFiltersNeeded) %>%
  filter(MaxDepth.x >= .85*MeanDepth)

# Bring back into single df
subset.df <- rbind(sites_less85max, special1, special2, special3, special4, special5)


# "add additional filter excluding trend results for profiles where NObs<10 (I count there are ~7 of these)"
# applying this filter to the df for deriving trends (`subset.df`) complicates the code substantially because it results in there not being a corresponding MonthNum 5:10 for each station ID. Filter applied after the trends are derived but before plotting. 

countPerGroup <- subset.df %>%
  group_by(FDT_STA_ID, MonthNum) %>%
  summarise(Nobs = n()) %>%
  mutate(key = paste(FDT_STA_ID, MonthNum, sep="_"))

insufficient_Nobs <- countPerGroup %>%
  filter(Nobs < 10)

insufficient_Nobs <- insufficient_Nobs$key

```


## Minimum temperature trend

```{r `derive TMin Anomaly`}
monthly_Tmin <- subset.df %>%
  group_by(FDT_STA_ID, MonthNum) %>%
  summarize(Mean = mean(TMin, na.rm = TRUE),
            Min = min(TMin, na.rm = TRUE),
            Max = max(TMin, na.rm = TRUE),
            Nobs = n(),
            .groups = 'drop') %>%
  mutate(Month = month.name[MonthNum])

# write_csv(monthly_Tmin, "TMin_stats_monthlyByStation.csv")

cols_to_keep <- colnames(subset.df)

df.Tmin <- subset.df %>%
  left_join(monthly_Tmin, by = c("FDT_STA_ID", "MonthNum")) %>%
  select(cols_to_keep, Mean) %>%
  mutate(TAnomaly = TMin - Mean)  

# apply filter to exclude month-stations with < 10 Nobs from visualization
monthly_Tmin <- monthly_Tmin %>%
  mutate(key = paste(FDT_STA_ID, MonthNum, sep="_")) %>%
  filter(!key %in% insufficient_Nobs)

```

```{r `fit regressions for each month-station`}
# fit a linear regression for anomaly values vs. year by month-station using the mblm function (median based linear model) as this is thought to be less sensitive to outliers.

# install.packages("mblm")
library(mblm)

#Usage
#mblm(formula, dataframe, repeated = TRUE)
  #Arguments
    #formula A formula of type y ~ x (only linear models are accepted)
    #dataframe Optional dataframe
    #repeated If set to true, model is computed using repeated medians. If false, a single median estimators are calculated

# remove NA values -- mblm() does not handle NA as lm() does
df.Tmin <- df.Tmin %>%
  filter(!is.na(TAnomaly)) %>%
  filter(!is.na(TMin))

## ------TMin Anomaly------
# create list to store regression results
modelSummaries_TMinAnomaly <- list()

for (station_id in unique(df.Tmin$FDT_STA_ID)) {
  for (month_num in 5:10) {
    
    month_station <- df.Tmin %>%
      filter(FDT_STA_ID == station_id & MonthNum == month_num)

      model <- mblm(TAnomaly ~ Year, data = month_station)
      mod.sum <- summary.mblm(model)
      
      # store results
      modelSummaries_TMinAnomaly[[paste(station_id, month_num, sep = "_")]] <- list(
        slope = mod.sum$coefficients[2,1],
        MAD = mod.sum$coefficients["Year", "MAD"] ,
        pvalue = mod.sum$coefficients["Year", 4],
        intercept = mod.sum$coefficients[1,1]
        )
  }
}

## --------TMin, non-normalized--------
# 6/4 Do the same as above, but for TMin non-normalized. This is needed for the data viz (August only trend line plots)

modelSummaries_TMin <- list()

for (station_id in unique(df.Tmin$FDT_STA_ID)) {
  for (month_num in 5:10) {
    
    month_station <- df.Tmin %>%
      filter(FDT_STA_ID == station_id & MonthNum == month_num)

      model <- mblm(TMin ~ Year, data = month_station)
      mod.sum <- summary.mblm(model)
      
      # store results
      modelSummaries_TMin[[paste(station_id, month_num, sep = "_")]] <- list(
        slope = mod.sum$coefficients[2,1],
        MAD = mod.sum$coefficients["Year", "MAD"] ,
        pvalue = mod.sum$coefficients["Year", 4],
        intercept = mod.sum$coefficients[1,1]
        )
  }
}

## --------TMin, with 1975 as y intercept-------------
#  please send a csv file of the Tmin (bottom) results shown in this figure [using TMin and not TMinAnomaly], which includes the y intercept of the trend line (at year = 1975), along with the slope and p value.  I'd like to see whether the lakes with colder bottoms are warming at the same rate as the lakes with warmer bottoms.
q <- df.Tmin %>%
  mutate(Year1975 = Year - 1975) %>%
  filter(MonthNum == 8)

modelSummaries_TMin1975 <- list()

for (station_id in unique(q$FDT_STA_ID)) {
      data = subset(q, FDT_STA_ID == station_id)
      model <- mblm(TMin ~ Year1975, data = data)
      mod.sum <- summary.mblm(model)
      
      # store results
      modelSummaries_TMin1975[[paste(station_id)]] <- list(
        slope = mod.sum$coefficients[2,1],
        MAD = mod.sum$coefficients["Year1975", "MAD"] ,
        pvalue = mod.sum$coefficients["Year1975", 4],
        intercept = mod.sum$coefficients[1,1]
        )
  }


# mblm does not return a standard error. Instead, summary.mblm can be used to extract the MAD or Median Absolute Deviation." It's a robust measure of variability that is less sensitive to outliers than the standard deviation, which is commonly used in traditional statistical analyses. 

```


```{r `add reg stats to summary df`}
# create station-month key
monthly_TMinAnomaly <- monthly_Tmin %>%
  mutate(key = paste(FDT_STA_ID, MonthNum, sep="_"),
          model_slope = NA,
          model_MAD = NA,
          model_pval = NA,
          model_intercept = NA)

for (i in 1:nrow(monthly_TMinAnomaly)) {
  key = monthly_TMinAnomaly$key[i]
  
  monthly_TMinAnomaly$model_slope[i] <-  modelSummaries_TMinAnomaly[[key]]$slope
   monthly_TMinAnomaly$model_MAD[i] <-  modelSummaries_TMinAnomaly[[key]]$MAD
    monthly_TMinAnomaly$model_pval[i] <-  modelSummaries_TMinAnomaly[[key]]$pvalue
     monthly_TMinAnomaly$model_intercept[i] <-  modelSummaries_TMinAnomaly[[key]]$intercept
  
}

#---------------------------------------------
# Same as above, but for TMin non-normalized

monthly_TMin <- monthly_Tmin %>%
  mutate(key = paste(FDT_STA_ID, MonthNum, sep="_"),
          model_slope = NA,
          model_MAD = NA,
          model_pval = NA,
          model_intercept = NA)

for (i in 1:nrow(monthly_TMin)) {
  key = monthly_TMin$key[i]
  
  monthly_TMin$model_slope[i] <-  modelSummaries_TMin[[key]]$slope
   monthly_TMin$model_MAD[i] <-  modelSummaries_TMin[[key]]$MAD
    monthly_TMin$model_pval[i] <-  modelSummaries_TMin[[key]]$pvalue
     monthly_TMin$model_intercept[i] <-  modelSummaries_TMin[[key]]$intercept
  
}

#--------------------------------------------
# reg stats for August TMin non-normalized with 1975 as y intercept
monthly_TMin1975 <- monthly_Tmin %>%
  mutate( model_slope = NA,
          model_MAD = NA,
          model_pval = NA,
          model_intercept = NA) %>%
  filter(MonthNum == 8)

for (i in 1:nrow(monthly_TMin1975)) {
  key = monthly_TMin1975$FDT_STA_ID[i]
  
  monthly_TMin1975$model_slope[i] <-  modelSummaries_TMin1975[[key]]$slope
   monthly_TMin1975$model_MAD[i] <-  modelSummaries_TMin1975[[key]]$MAD
    monthly_TMin1975$model_pval[i] <-  modelSummaries_TMin1975[[key]]$pvalue
     monthly_TMin1975$model_intercept[i] <-  modelSummaries_TMin1975[[key]]$intercept
  
}

#write_csv(monthly_TMinAnomaly, "output_data/TMinAnomaly_mblmModelStatistics.csv")
write_csv(monthly_TMin, "output_data/TMin_mblmModelStatistics.csv")
write_csv(monthly_TMin1975, "output_data/TMin_y1975.csv")
```


## Mean Temperature Trend

```{r `derive TMean Anomaly`}
monthly_TMean <- subset.df %>%
  group_by(FDT_STA_ID, MonthNum) %>%
  summarize(Mean = mean(TMean, na.rm = TRUE),
            Min = min(TMean, na.rm = TRUE),
            Max = max(TMean, na.rm = TRUE),
            Nobs = n(),
            .groups = 'drop') %>%
  mutate(Month = month.name[MonthNum])

# write_csv(monthly_TMean, "TMean_stats_monthlyByStation.csv")

cols_to_keep <- colnames(subset.df)

df.TMean <- subset.df %>%
  left_join(monthly_TMean, by = c("FDT_STA_ID", "MonthNum")) %>%
  select(cols_to_keep, Mean) %>%
  mutate(TAnomaly = TMean - Mean)  

# apply filter to exclude month-stations with < 10 Nobs from visualization
monthly_TMean <- monthly_TMean %>%
  mutate(key = paste(FDT_STA_ID, MonthNum, sep="_")) %>%
  filter(!key %in% insufficient_Nobs)

```

```{r `fit regressions for each month-station2`}
# fit a linear regression for anomaly values vs. year by month-station using the mblm function (median based linear model) as this is thought to be less sensitive to outliers.

# install.packages("mblm")
library(mblm)

#Usage
#mblm(formula, dataframe, repeated = TRUE)
  #Arguments
    #formula A formula of type y ~ x (only linear models are accepted)
    #dataframe Optional dataframe
    #repeated If set to true, model is computed using repeated medians. If false, a single median estimators are calculated

# remove NA values -- mblm() does not handle NA as lm() does
df.TMean <- df.TMean %>%
  filter(!is.na(TAnomaly)) %>%
  filter(!is.na(TMean))

# create list to store regression results
modelSummaries_TMeanAnomaly <- list()

for (station_id in unique(df.TMean$FDT_STA_ID)) {
  for (month_num in 5:10) {
    
    month_station <- df.TMean %>%
      filter(FDT_STA_ID == station_id & MonthNum == month_num)

      model <- mblm(TAnomaly ~ Year, data = month_station)
      mod.sum <- summary.mblm(model)
      
      # store results
      modelSummaries_TMeanAnomaly[[paste(station_id, month_num, sep = "_")]] <- list(
        slope = mod.sum$coefficients[2,1],
        MAD = mod.sum$coefficients["Year", "MAD"] ,
        pvalue = mod.sum$coefficients["Year", 4],
        intercept = mod.sum$coefficients[1,1]
        )
  }
}

# 6/4 Do the same as above, but for TMean non-normalized. This is needed for the data viz (August only trend line plots)

modelSummaries_TMean <- list()

for (station_id in unique(df.Tmin$FDT_STA_ID)) {
  for (month_num in 5:10) {
    
    month_station <- df.Tmin %>%
      filter(FDT_STA_ID == station_id & MonthNum == month_num)

      model <- mblm(TMean ~ Year, data = month_station)
      mod.sum <- summary.mblm(model)
      
      # store results
      modelSummaries_TMean[[paste(station_id, month_num, sep = "_")]] <- list(
        slope = mod.sum$coefficients[2,1],
        MAD = mod.sum$coefficients["Year", "MAD"] ,
        pvalue = mod.sum$coefficients["Year", 4],
        intercept = mod.sum$coefficients[1,1]
        )
  }
}

## --------TMean, with 1975 as y intercept-------------

q <- df.TMean %>%
  mutate(Year1975 = Year - 1975) %>%
  filter(MonthNum == 8)

modelSummaries_TMean1975 <- list()

for (station_id in unique(q$FDT_STA_ID)) {
      data = subset(q, FDT_STA_ID == station_id)
      model <- mblm(TMean ~ Year1975, data = data)
      mod.sum <- summary.mblm(model)
      
      # store results
      modelSummaries_TMean1975[[paste(station_id)]] <- list(
        slope = mod.sum$coefficients[2,1],
        MAD = mod.sum$coefficients["Year1975", "MAD"] ,
        pvalue = mod.sum$coefficients["Year1975", 4],
        intercept = mod.sum$coefficients[1,1]
        )
  }


# mblm does not return a standard error. Instead, summary.mblm can be used to extract the MAD or Median Absolute Deviation." It's a robust measure of variability that is less sensitive to outliers than the standard deviation, which is commonly used in traditional statistical analyses. 

```


```{r `add reg stats to summary df2`}
# create station-month key
monthly_TMeanAnomaly <- monthly_TMean %>%
  mutate( key = paste(FDT_STA_ID, MonthNum, sep="_"),
          model_slope = NA,
          model_MAD = NA,
          model_pval = NA,
          model_intercept = NA)

for (i in 1:nrow(monthly_TMeanAnomaly)) {
  key = monthly_TMeanAnomaly$key[i]
  
  monthly_TMeanAnomaly$model_slope[i] <-  modelSummaries_TMeanAnomaly[[key]]$slope
   monthly_TMeanAnomaly$model_MAD[i] <-  modelSummaries_TMeanAnomaly[[key]]$MAD
    monthly_TMeanAnomaly$model_pval[i] <-  modelSummaries_TMeanAnomaly[[key]]$pvalue
     monthly_TMeanAnomaly$model_intercept[i] <-  modelSummaries_TMeanAnomaly[[key]]$intercept
  
}

# Same as above, but for TMean non-normalized
monthly_TMean <- monthly_TMean %>%
  mutate(key = paste(FDT_STA_ID, MonthNum, sep="_"),
          model_slope = NA,
          model_MAD = NA,
          model_pval = NA,
          model_intercept = NA)

for (i in 1:nrow(monthly_TMean)) {
  key = monthly_TMean$key[i]
  
  monthly_TMean$model_slope[i] <-  modelSummaries_TMean[[key]]$slope
   monthly_TMean$model_MAD[i] <-  modelSummaries_TMean[[key]]$MAD
    monthly_TMean$model_pval[i] <-  modelSummaries_TMean[[key]]$pvalue
     monthly_TMean$model_intercept[i] <-  modelSummaries_TMean[[key]]$intercept
  
}


#--------------------------------------------
# reg stats for August TMean non-normalized with 1975 as y intercept
monthly_TMean1975 <- monthly_TMean %>%
  mutate( model_slope = NA,
          model_MAD = NA,
          model_pval = NA,
          model_intercept = NA) %>%
  filter(MonthNum == 8)

for (i in 1:nrow(monthly_TMean1975)) {
  key = monthly_TMean1975$FDT_STA_ID[i]
  
  monthly_TMean1975$model_slope[i] <-  modelSummaries_TMean1975[[key]]$slope
   monthly_TMean1975$model_MAD[i] <-  modelSummaries_TMean1975[[key]]$MAD
    monthly_TMean1975$model_pval[i] <-  modelSummaries_TMean1975[[key]]$pvalue
     monthly_TMean1975$model_intercept[i] <-  modelSummaries_TMean1975[[key]]$intercept
  
}


write_csv(monthly_TMeanAnomaly, "output_data/TMeanAnomaly_mblmModelStatistics.csv")
#write_csv(monthly_TMean1975, "output_data/TMean_y1975.csv")
```


## Temperature Range Trend

```{r `derive TRange mean by month`}
monthly_TRange <- subset.df %>%
  group_by(FDT_STA_ID, MonthNum) %>%
  summarize(Mean = mean(TRange, na.rm = TRUE),
            Min = min(TRange, na.rm = TRUE),
            Max = max(TRange, na.rm = TRUE),
            Nobs = n(),
            .groups = 'drop') %>%
  mutate(Month = month.name[MonthNum])

cols_to_keep <- colnames(subset.df)

df.TRange <- subset.df %>%
  left_join(monthly_TRange, by = c("FDT_STA_ID", "MonthNum")) %>%
  select(cols_to_keep, Mean) 

# apply filter to exclude month-stations with < 10 Nobs from visualization
monthly_TRange <- monthly_TRange %>%
  mutate(key = paste(FDT_STA_ID, MonthNum, sep="_")) %>%
  filter(!key %in% insufficient_Nobs)

```

```{r `fit regressions for each month-station`}
# fit a linear regression for TRange vs. year by month-station using the mblm function (median based linear model) as this is thought to be less sensitive to outliers.

# install.packages("mblm")
library(mblm)

#Usage
#mblm(formula, dataframe, repeated = TRUE)
  #Arguments
    #formula A formula of type y ~ x (only linear models are accepted)
    #dataframe Optional dataframe
    #repeated If set to true, model is computed using repeated medians. If false, a single median estimators are calculated

# remove NA values -- mblm() does not handle NA as lm() does
df.TRange <- df.TRange %>%
  filter(!is.na(TRange))

# create list to store regression results
modelSummaries <- list()

for (station_id in unique(df.TRange$FDT_STA_ID)) {
  for (month_num in 5:10) {
    
    month_station <- df.TRange %>%
      filter(FDT_STA_ID == station_id & MonthNum == month_num)

      model <- mblm(TRange ~ Year, data = month_station)
      mod.sum <- summary.mblm(model)
      
      # store results
      modelSummaries[[paste(station_id, month_num, sep = "_")]] <- list(
        slope = mod.sum$coefficients[2,1],
        MAD = mod.sum$coefficients["Year", "MAD"] ,
        pvalue = mod.sum$coefficients["Year", 4],
        intercept = mod.sum$coefficients[1,1]
        )
  }
}

## --------TRange, with 1975 as y intercept-------------

q <- df.TRange %>%
  mutate(Year1975 = Year - 1975) %>%
  filter(MonthNum == 8)

modelSummaries_TRange1975 <- list()

for (station_id in unique(q$FDT_STA_ID)) {
      data = subset(q, FDT_STA_ID == station_id)
      model <- mblm(TRange ~ Year1975, data = data)
      mod.sum <- summary.mblm(model)
      
      # store results
      modelSummaries_TRange1975[[paste(station_id)]] <- list(
        slope = mod.sum$coefficients[2,1],
        MAD = mod.sum$coefficients["Year1975", "MAD"] ,
        pvalue = mod.sum$coefficients["Year1975", 4],
        intercept = mod.sum$coefficients[1,1]
        )
  }


# mblm does not return a standard error. Instead, summary.mblm can be used to extract the MAD or Median Absolute Deviation." It's a robust measure of variability that is less sensitive to outliers than the standard deviation, which is commonly used in traditional statistical analyses. 

```


```{r `add reg stats to summary df`}
# create station-month key
monthly_TRange <- monthly_TRange %>%
  mutate( key = paste(FDT_STA_ID, MonthNum, sep="_"),
          model_slope = NA,
          model_MAD = NA,
          model_pval = NA,
          model_intercept = NA)

for (i in 1:nrow(monthly_TRange)) {
  key = monthly_TRange$key[i]
  
  monthly_TRange$model_slope[i] <-  modelSummaries[[key]]$slope
   monthly_TRange$model_MAD[i] <-  modelSummaries[[key]]$MAD
    monthly_TRange$model_pval[i] <-  modelSummaries[[key]]$pvalue
     monthly_TRange$model_intercept[i] <-  modelSummaries[[key]]$intercept
  
}


#--------------------------------------------
# reg stats for August TRange with 1975 as y intercept
monthly_TRange1975 <- monthly_TRange %>%
  mutate( model_slope = NA,
          model_MAD = NA,
          model_pval = NA,
          model_intercept = NA) %>%
  filter(MonthNum == 8)

for (i in 1:nrow(monthly_TRange1975)) {
  key = monthly_TRange1975$FDT_STA_ID[i]
  
  monthly_TRange1975$model_slope[i] <-  modelSummaries_TRange1975[[key]]$slope
   monthly_TRange1975$model_MAD[i] <-  modelSummaries_TRange1975[[key]]$MAD
    monthly_TRange1975$model_pval[i] <-  modelSummaries_TRange1975[[key]]$pvalue
     monthly_TRange1975$model_intercept[i] <-  modelSummaries_TRange1975[[key]]$intercept
  
}


write_csv(monthly_TRange, "output_data/TRange_mblmModelStatistics.csv")
```

### Single DF with all 1975 y-intercept model results

```{r}
a <- monthly_TMean1975 %>%
  mutate(variable = "TMean")
b <- monthly_TMin1975 %>%
  mutate(variable = "TMin")
c <- monthly_TRange1975 %>%
  mutate(variable = "TRange")

d <- rbind(a,b,c) %>%
  select(-Mean, -Min, - Max)

readr::write_csv(d, "output_data/1975_y_intercept_TempVars_model_results.csv")


```


## Data viz

"a box-whisker plot showing the distribution of values by month across all stations."

```{r `custom func`}
# custom function to prepare plotting data for each variable
makePlotData <- function(df) {

# derive mean warming and incorporate back into df for plotting
df.allMonths <- df %>%
  group_by(FDT_STA_ID) %>%
  summarize(mean_slope = mean(model_slope, na.rm = TRUE),
            .groups = "drop") %>%
  mutate(Month = "All Months",
         trend= "N/A")

df.plot <- bind_rows(df, df.allMonths)

# populate model slope column with mean slope where Month == "All Months"
df.plot <- df.plot %>%
  mutate(model_slope = if_else(Month == "All Months", mean_slope, model_slope))


# set month as factor
df.plot <- df.plot %>%
  mutate(Month = factor(Month, levels = c("January", "February", "March", 
                                          "April", "May", "June", "July", 
                                          "August", "September", "October", 
                                          "November", "December", "All Months")),
         trend = ifelse(is.na(df.plot$model_pval), "N/A",
                        ifelse(df.plot$model_pval <= 0.05, "significant", "not significant")))

df.plot$trend <- factor(df.plot$trend, levels = c("significant", "not significant", "N/A"))

return(df.plot)

}

```

### TMin

```{r}
TMin_plot.df <- makePlotData(Tmin_final)

ggplot(TMin_plot.df, aes(x = Month, y = model_slope, fill = Month)) +
  geom_boxplot(alpha = .85) +
  labs(title = "Variation in Warming Rates by Month (TMin)",
       x = "Month",
       y = "T Trend (C/y)",
       fill = "Month") +
  theme_minimal() +
  theme(axis.title.x = element_text(margin = margin(t = 10)),
        legend.position = "none")

ggsave("output_data/TMin_variationByMonth.jpg")

```

### TMean

```{r}
TMean_plot.df <- makePlotData(TMean_final)

ggplot(TMean_plot.df, aes(x = Month, y = model_slope, fill = Month)) +
  geom_boxplot(alpha = .85) +
  labs(title = "Variation in Warming Rates by Month (TMean)",
       x = "Month",
       y = "T Trend (C/y)",
       fill = "Month") +
  theme_minimal() +
  theme(axis.title.x = element_text(margin = margin(t = 10)),
        legend.position = "none")

ggsave("output_data/TMean_variationByMonth.jpg")

```

### TRange

```{r}
TRange_plot.df <- makePlotData(TRange_final)

ggplot(TRange_plot.df, aes(x = Month, y = model_slope, fill = Month)) +
  geom_boxplot(alpha = .85) +
  labs(title = "Variation in Warming Rates by Month (TRange)",
       x = "Month",
       y = "T Trend (C/y)",
       fill = "Month") +
  theme_minimal() +
  theme(axis.title.x = element_text(margin = margin(t = 10)),
        legend.position = "none")

ggsave("output_data/TRange_variationByMonth.jpg")

```