knitr::opts_chunk$set(echo = TRUE)
# Install any missing packages
install_packages <- function(packages) {
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages) > 0) {
install.packages(new_packages, dependencies = TRUE)
}
}
packages <- c("ggplot2", "tidyverse", "mblm")
install_packages(packages)
library(ggplot2)
library(tidyverse)
library(mblm)
trendSites.df <- openxlsx::read.xlsx("input_data/data_original.xlsx", sheet = 2)
allData.df <- openxlsx::read.xlsx("input_data/data_original.xlsx", sheet = 3)
# Convert `date` column from Excel encoded date to a more legible date format. Otherwise date shows as numeric value, e.g. '44230'.
allData.df$Date <- allData.df$Date * 86400 # 86400 = seconds in a day.
allData.df$Date <- as.POSIXct(allData.df$Date, origin = "1899-12-30", tz = "UTC")
# a surface temperature (Tmax) of 2.8 C in August
# 13774 6ACNR000.00 8/28/1990 0.3 NA NA NA NA
which(grepl("13774", allData.df$X1))
## [1] 13770
allData.df[13770, (5:8)] <- NA
station_IDS <- trendSites.df$FDT_STA_ID
excluded_sites <- c("2-JKS053.48",
"2-XDD000.40",
"4AROA192.55",
"4AROA196.05",
"5ASRN000.66",
"6ACNR000.00",
"6APNR008.15") # only surface water measurements ever taken
# subset 34 trend sites (41 - 7 excluded)
trendsites <- allData.df %>%
mutate(MonthNum = lubridate::month(Date),
Year = lubridate::year(Date)) %>%
filter(FDT_STA_ID %in% station_IDS) %>%
filter(!FDT_STA_ID %in% excluded_sites) %>%
filter(MonthNum %in% 5:10)
# apply filters (based on 'Filter' column in "Station Specific Trend Sites.xlsx")
specialFiltersNeeded <- c("4ABWR017.42", "4AROA038.49", "4AROA192.94", "9-NEW098.32", "9-PKC004.16")
special1 <- trendsites %>%
filter(FDT_STA_ID == "4ABWR017.42") %>%
filter(MaxDepth.x >= 5)
special2 <- trendsites %>%
filter(FDT_STA_ID == "4AROA038.49") %>%
filter(MaxDepth.x >= 6)
special3 <- trendsites %>%
filter(FDT_STA_ID == "4AROA192.94") %>%
filter(MaxDepth.x >= 3)
special4 <- trendsites %>%
filter(FDT_STA_ID == "9-NEW098.32") %>%
filter(MaxDepth.x >= 6)
special5 <- trendsites %>%
filter(FDT_STA_ID == "9-PKC004.16") %>%
filter(MaxDepth.x >= 3)
sites_less85max <- trendsites %>%
filter(!FDT_STA_ID %in% specialFiltersNeeded) %>%
filter(MaxDepth.x >= .85*MeanDepth)
# Bring back into single df
subset.df <- rbind(sites_less85max, special1, special2, special3, special4, special5)
# "add additional filter excluding trend results for profiles where NObs<10 (I count there are ~7 of these)"
# applying this filter to the df for deriving trends (`subset.df`) complicates the code substantially because it results in there not being a corresponding MonthNum 5:10 for each station ID. Filter applied after the trends are derived but before plotting.
countPerGroup <- subset.df %>%
group_by(FDT_STA_ID, MonthNum) %>%
summarise(Nobs = n()) %>%
mutate(key = paste(FDT_STA_ID, MonthNum, sep="_"))
## `summarise()` has grouped output by 'FDT_STA_ID'. You can override using the
## `.groups` argument.
insufficient_Nobs <- countPerGroup %>%
filter(Nobs < 10)
insufficient_Nobs <- insufficient_Nobs$key
monthlyDOMin <- subset.df %>%
group_by(FDT_STA_ID, MonthNum) %>%
summarize(Mean = mean(DOMin, na.rm = TRUE),
Min = min(DOMin, na.rm = TRUE),
Max = max(DOMin, na.rm = TRUE),
Nobs = n(),
.groups = 'drop') %>%
mutate(Month = month.name[MonthNum])
# write_csv(monthlyDOMin, "output_data/DOMin_stats_monthlyByStation.csv")
cols_to_keep <- colnames(subset.df)
df.DOMin <- subset.df %>%
left_join(monthlyDOMin, by = c("FDT_STA_ID", "MonthNum")) %>%
select(all_of(cols_to_keep), Mean)
# fit a linear regression for DOMean vs. year by month-station using the mblm function (median based linear model) as this is thought to be less sensitive to outliers.
#Usage
#mblm(formula, dataframe, repeated = TRUE)
#Arguments
#formula A formula of type y ~ x (only linear models are accepted)
#dataframe Optional dataframe
#repeated If set to true, model is computed using repeated medians. If false, a single median estimators are calculated
# remove NA values -- mblm() does not handle NA as lm() does
df.DOMin <- df.DOMin %>%
filter(!is.na(DOMin))
# create list to store regression results
modelSummaries <- list()
for (station_id in unique(df.DOMin$FDT_STA_ID)) {
for (month_num in 5:10) {
month_station <- df.DOMin %>%
filter(FDT_STA_ID == station_id & MonthNum == month_num)
model <- mblm(DOMin ~ Year, data = month_station)
mod.sum <- summary.mblm(model)
# store results
modelSummaries[[paste(station_id, month_num, sep = "_")]] <- list(
slope = mod.sum$coefficients[2,1],
MAD = mod.sum$coefficients["Year", "MAD"] ,
pvalue = mod.sum$coefficients["Year", 4],
intercept = mod.sum$coefficients[1,1]
)
}
}
## --------DOMin, with 1975 as y intercept-------------
q <- df.DOMin %>%
mutate(Year1975 = Year - 1975) %>%
filter(MonthNum == 8)
modelSummaries_DOMin1975 <- list()
for (station_id in unique(q$FDT_STA_ID)) {
data = subset(q, FDT_STA_ID == station_id)
model <- mblm(DOMin ~ Year1975, data = data)
mod.sum <- summary.mblm(model)
# store results
modelSummaries_DOMin1975[[paste(station_id)]] <- list(
slope = mod.sum$coefficients[2,1],
MAD = mod.sum$coefficients["Year1975", "MAD"] ,
pvalue = mod.sum$coefficients["Year1975", 4],
intercept = mod.sum$coefficients[1,1]
)
}
# mblm does not return a standard error. Instead, summary.mblm can be used to extract the MAD or Median Absolute Deviation." It's a robust measure of variability that is less sensitive to outliers than the standard deviation, which is commonly used in traditional statistical analyses.
# create station-month key
monthlyDOMin <- monthlyDOMin %>%
mutate( key = paste(FDT_STA_ID, MonthNum, sep="_"),
model_slope = NA,
model_MAD = NA,
model_pval = NA,
model_intercept = NA)
for (i in 1:nrow(monthlyDOMin)) {
key = monthlyDOMin$key[i]
monthlyDOMin$model_slope[i] <- modelSummaries[[key]]$slope
monthlyDOMin$model_MAD[i] <- modelSummaries[[key]]$MAD
monthlyDOMin$model_pval[i] <- modelSummaries[[key]]$pvalue
monthlyDOMin$model_intercept[i] <- modelSummaries[[key]]$intercept
}
DOMin_final <- monthlyDOMin %>%
filter(!key %in% insufficient_Nobs)
#--------------------------------------------
# reg stats for August DOMin non-normalized with 1975 as y intercept
monthly_DOMin1975 <- monthlyDOMin %>%
mutate( model_slope = NA,
model_MAD = NA,
model_pval = NA,
model_intercept = NA) %>%
filter(MonthNum == 8)
for (i in 1:nrow(monthly_DOMin1975)) {
key = monthly_DOMin1975$FDT_STA_ID[i]
monthly_DOMin1975$model_slope[i] <- modelSummaries_DOMin1975[[key]]$slope
monthly_DOMin1975$model_MAD[i] <- modelSummaries_DOMin1975[[key]]$MAD
monthly_DOMin1975$model_pval[i] <- modelSummaries_DOMin1975[[key]]$pvalue
monthly_DOMin1975$model_intercept[i] <- modelSummaries_DOMin1975[[key]]$intercept
}
DOMin1975_final <- monthly_DOMin1975 %>%
filter(!key %in% insufficient_Nobs)
#------------------------------------------------
write_csv(DOMin_final, "output_data/DOMin_mblmModelStatistics.csv")
# do any month-stations return a 0 slope with sig pval? In this case, no
wonky_results <- DOMin_final %>%
filter(model_slope == 0 & model_pval <= 0.05)
paste(nrow(wonky_results), "station-months returned a slope of 0 with a significant p-value in DOMin trend analysis.", sep=" ")
## [1] "0 station-months returned a slope of 0 with a significant p-value in DOMin trend analysis."
monthlyDOMean <- subset.df %>%
group_by(FDT_STA_ID, MonthNum) %>%
summarize(Mean = mean(DOMean, na.rm = TRUE),
Min = min(DOMean, na.rm = TRUE),
Max = max(DOMean, na.rm = TRUE),
Nobs = n(),
.groups = 'drop') %>%
mutate(Month = month.name[MonthNum])
# write_csv(monthlyDOMean, "output_data/DOMean_stats_monthlyByStation.csv")
cols_to_keep <- colnames(subset.df)
df.DOMean <- subset.df %>%
left_join(monthlyDOMean, by = c("FDT_STA_ID", "MonthNum")) %>%
select(all_of(cols_to_keep), Mean)
# fit a linear regression for DOMean vs. year by month-station using the mblm function (median based linear model) as this is thought to be less sensitive to outliers.
#Usage
#mblm(formula, dataframe, repeated = TRUE)
#Arguments
#formula A formula of type y ~ x (only linear models are accepted)
#dataframe Optional dataframe
#repeated If set to true, model is computed using repeated medians. If false, a single median estimators are calculated
# remove NA values -- mblm() does not handle NA as lm() does
df.DOMean <- df.DOMean %>%
filter(!is.na(DOMean))
# create list to store regression results
modelSummaries <- list()
for (station_id in unique(df.DOMean$FDT_STA_ID)) {
for (month_num in 5:10) {
month_station <- df.DOMean %>%
filter(FDT_STA_ID == station_id & MonthNum == month_num)
model <- mblm(DOMean ~ Year, data = month_station)
mod.sum <- summary.mblm(model)
# store results
modelSummaries[[paste(station_id, month_num, sep = "_")]] <- list(
slope = mod.sum$coefficients[2,1],
MAD = mod.sum$coefficients["Year", "MAD"] ,
pvalue = mod.sum$coefficients["Year", 4],
intercept = mod.sum$coefficients[1,1]
)
}
}
## --------DOMin, with 1975 as y intercept-------------
q <- df.DOMean %>%
mutate(Year1975 = Year - 1975) %>%
filter(MonthNum == 8)
modelSummaries_DOMean1975 <- list()
for (station_id in unique(q$FDT_STA_ID)) {
data = subset(q, FDT_STA_ID == station_id)
model <- mblm(DOMean ~ Year1975, data = data)
mod.sum <- summary.mblm(model)
# store results
modelSummaries_DOMean1975[[paste(station_id)]] <- list(
slope = mod.sum$coefficients[2,1],
MAD = mod.sum$coefficients["Year1975", "MAD"] ,
pvalue = mod.sum$coefficients["Year1975", 4],
intercept = mod.sum$coefficients[1,1]
)
}
# mblm does not return a standard error. Instead, summary.mblm can be used to extract the MAD or Median Absolute Deviation." It's a robust measure of variability that is less sensitive to outliers than the standard deviation, which is commonly used in traditional statistical analyses.
# create station-month key
monthlyDOMean <- monthlyDOMean %>%
mutate( key = paste(FDT_STA_ID, MonthNum, sep="_"),
model_slope = NA,
model_MAD = NA,
model_pval = NA,
model_intercept = NA)
for (i in 1:nrow(monthlyDOMean)) {
key = monthlyDOMean$key[i]
monthlyDOMean$model_slope[i] <- modelSummaries[[key]]$slope
monthlyDOMean$model_MAD[i] <- modelSummaries[[key]]$MAD
monthlyDOMean$model_pval[i] <- modelSummaries[[key]]$pvalue
monthlyDOMean$model_intercept[i] <- modelSummaries[[key]]$intercept
}
DOMean_final <- monthlyDOMean %>%
filter(!key %in% insufficient_Nobs)
#--------------------------------------------
# reg stats for August DOMean non-normalized with 1975 as y intercept
monthly_DOMean1975 <- monthlyDOMean %>%
mutate( model_slope = NA,
model_MAD = NA,
model_pval = NA,
model_intercept = NA) %>%
filter(MonthNum == 8)
for (i in 1:nrow(monthly_DOMean1975)) {
key = monthly_DOMean1975$FDT_STA_ID[i]
monthly_DOMean1975$model_slope[i] <- modelSummaries_DOMean1975[[key]]$slope
monthly_DOMean1975$model_MAD[i] <- modelSummaries_DOMean1975[[key]]$MAD
monthly_DOMean1975$model_pval[i] <- modelSummaries_DOMean1975[[key]]$pvalue
monthly_DOMean1975$model_intercept[i] <- modelSummaries_DOMean1975[[key]]$intercept
}
DOMean1975_final <- monthly_DOMean1975 %>%
filter(!key %in% insufficient_Nobs)
#------------------------------------------------
write_csv(DOMean_final, "output_data/DOMean_mblmModelStatistics.csv")
# do any month-stations return a 0 slope with sig pval? In this case, no
wonky_results <- DOMean_final %>%
filter(model_slope == 0 & model_pval <= 0.05)
paste(nrow(wonky_results), "station-months returned a slope of 0 with a significant p-value in DOMean trend analysis.", sep=" ")
## [1] "0 station-months returned a slope of 0 with a significant p-value in DOMean trend analysis."
monthlyDORange <- subset.df %>%
group_by(FDT_STA_ID, MonthNum) %>%
summarize(Mean = mean(DORange, na.rm = TRUE),
Min = min(DORange, na.rm = TRUE),
Max = max(DORange, na.rm = TRUE),
Nobs = n(),
.groups = 'drop') %>%
mutate(Month = month.name[MonthNum])
# write_csv(monthlyDORange, "output_data/DORange_stats_monthlyByStation.csv")
cols_to_keep <- colnames(subset.df)
df.DORange <- subset.df %>%
left_join(monthlyDORange, by = c("FDT_STA_ID", "MonthNum")) %>%
select(all_of(cols_to_keep), Mean)
# fit a linear regression for DORange vs. year by month-station using the mblm function (median based linear model) as this is thought to be less sensitive to outliers.
#Usage
#mblm(formula, dataframe, repeated = TRUE)
#Arguments
#formula A formula of type y ~ x (only linear models are accepted)
#dataframe Optional dataframe
#repeated If set to true, model is computed using repeated medians. If false, a single median estimators are calculated
# remove NA values -- mblm() does not handle NA as lm() does
df.DORange <- df.DORange %>%
filter(!is.na(DORange))
# create list to store regression results
modelSummaries <- list()
for (station_id in unique(df.DORange$FDT_STA_ID)) {
for (month_num in 5:10) {
month_station <- df.DORange %>%
filter(FDT_STA_ID == station_id & MonthNum == month_num)
model <- mblm(DORange ~ Year, data = month_station)
mod.sum <- summary.mblm(model)
# store results
modelSummaries[[paste(station_id, month_num, sep = "_")]] <- list(
slope = mod.sum$coefficients[2,1],
MAD = mod.sum$coefficients["Year", "MAD"] ,
pvalue = mod.sum$coefficients["Year", 4],
intercept = mod.sum$coefficients[1,1]
)
}
}
## --------DORange, with 1975 as y intercept-------------
q <- df.DORange %>%
mutate(Year1975 = Year - 1975) %>%
filter(MonthNum == 8)
modelSummaries_DORange1975 <- list()
for (station_id in unique(q$FDT_STA_ID)) {
data = subset(q, FDT_STA_ID == station_id)
model <- mblm(DORange ~ Year1975, data = data)
mod.sum <- summary.mblm(model)
# store results
modelSummaries_DORange1975[[paste(station_id)]] <- list(
slope = mod.sum$coefficients[2,1],
MAD = mod.sum$coefficients["Year1975", "MAD"] ,
pvalue = mod.sum$coefficients["Year1975", 4],
intercept = mod.sum$coefficients[1,1]
)
}
# mblm does not return a standard error. Instead, summary.mblm can be used to extract the MAD or Median Absolute Deviation." It's a robust measure of variability that is less sensitive to outliers than the standard deviation, which is commonly used in traditional statistical analyses.
# create station-month key
monthlyDORange <- monthlyDORange %>%
mutate( key = paste(FDT_STA_ID, MonthNum, sep="_"),
model_slope = NA,
model_MAD = NA,
model_pval = NA,
model_intercept = NA)
for (i in 1:nrow(monthlyDORange)) {
key = monthlyDORange$key[i]
monthlyDORange$model_slope[i] <- modelSummaries[[key]]$slope
monthlyDORange$model_MAD[i] <- modelSummaries[[key]]$MAD
monthlyDORange$model_pval[i] <- modelSummaries[[key]]$pvalue
monthlyDORange$model_intercept[i] <- modelSummaries[[key]]$intercept
}
DORange_final <- monthlyDORange %>%
filter(!key %in% insufficient_Nobs)
#--------------------------------------------
# reg stats for August DORange non-normalized with 1975 as y intercept
monthly_DORange1975 <- monthlyDORange %>%
mutate( model_slope = NA,
model_MAD = NA,
model_pval = NA,
model_intercept = NA) %>%
filter(MonthNum == 8)
for (i in 1:nrow(monthly_DORange1975)) {
key = monthly_DORange1975$FDT_STA_ID[i]
monthly_DORange1975$model_slope[i] <- modelSummaries_DORange1975[[key]]$slope
monthly_DORange1975$model_MAD[i] <- modelSummaries_DORange1975[[key]]$MAD
monthly_DORange1975$model_pval[i] <- modelSummaries_DORange1975[[key]]$pvalue
monthly_DORange1975$model_intercept[i] <- modelSummaries_DORange1975[[key]]$intercept
}
DORange1975_final <- monthly_DORange1975 %>%
filter(!key %in% insufficient_Nobs)
#------------------------------------------------
write_csv(DORange_final, "output_data/DORange_mblmModelStatistics.csv")
# do any month-stations return a 0 slope with sig pval? In this case, no
wonky_results <- DORange_final %>%
filter(model_slope == 0 & model_pval <= 0.05)
paste(nrow(wonky_results), "station-months returned a slope of 0 with a significant p-value in DORange trend analysis.", sep=" ")
## [1] "0 station-months returned a slope of 0 with a significant p-value in DORange trend analysis."
# This does not require filtering; surface only measurements are fine (?)
# Subset data more inclusively than in above calcs
DOMax.df <- allData.df %>%
mutate(MonthNum = lubridate::month(Date),
Year = lubridate::year(Date)) %>%
filter(FDT_STA_ID %in% station_IDS) %>%
filter(!FDT_STA_ID %in% c("6APNR008.15", "6ACNR000.00")) %>%
filter(MonthNum %in% 5:10)
countPerGroup_domax <- DOMax.df %>%
group_by(FDT_STA_ID, MonthNum) %>%
summarise(Nobs = n()) %>%
mutate(key = paste(FDT_STA_ID, MonthNum, sep="_"))
insufficient_Nobs_domax <- countPerGroup_domax %>%
filter(Nobs < 10)
insufficient_Nobs_domax <- insufficient_Nobs_domax$key
#--------------------------
monthlyDOMax <- DOMax.df %>%
group_by(FDT_STA_ID, MonthNum) %>%
summarize(Mean = mean(DOMax, na.rm = TRUE),
Min = min(DOMax, na.rm = TRUE),
Max = max(DOMax, na.rm = TRUE),
Nobs = n(),
.groups = 'drop') %>%
mutate(Month = month.name[MonthNum])
# write_csv(monthlyDOMin, "output_data/DOMin_stats_monthlyByStation.csv")
cols_to_keep <- colnames(DOMax.df)
df.DOMax <- DOMax.df %>%
left_join(monthlyDOMax, by = c("FDT_STA_ID", "MonthNum")) %>%
select(all_of(cols_to_keep), Mean)
# fit a linear regression for DOMean vs. year by month-station using the mblm function (median based linear model) as this is thought to be less sensitive to outliers.
#Usage
#mblm(formula, dataframe, repeated = TRUE)
#Arguments
#formula A formula of type y ~ x (only linear models are accepted)
#dataframe Optional dataframe
#repeated If set to true, model is computed using repeated medians. If false, a single median estimators are calculated
# remove NA values -- mblm() does not handle NA as lm() does
df.DOMax <- df.DOMax %>%
filter(!is.na(DOMax))
# create list to store regression results
modelSummaries <- list()
for (station_id in unique(df.DOMax$FDT_STA_ID)) {
for (month_num in 5:10) {
month_station <- df.DOMax %>%
filter(FDT_STA_ID == station_id & MonthNum == month_num)
model <- mblm(DOMax ~ Year, data = month_station)
mod.sum <- summary.mblm(model)
# store results
modelSummaries[[paste(station_id, month_num, sep = "_")]] <- list(
slope = mod.sum$coefficients[2,1],
MAD = mod.sum$coefficients["Year", "MAD"] ,
pvalue = mod.sum$coefficients["Year", 4],
intercept = mod.sum$coefficients[1,1]
)
}
}
## --------DOMax, with 1975 as y intercept-------------
q <- df.DOMax %>%
mutate(Year1975 = Year - 1975) %>%
filter(MonthNum == 8)
modelSummaries_DOMax1975 <- list()
for (station_id in unique(q$FDT_STA_ID)) {
data = subset(q, FDT_STA_ID == station_id)
model <- mblm(DOMax ~ Year1975, data = data)
mod.sum <- summary.mblm(model)
# store results
modelSummaries_DOMax1975[[paste(station_id)]] <- list(
slope = mod.sum$coefficients[2,1],
MAD = mod.sum$coefficients["Year1975", "MAD"] ,
pvalue = mod.sum$coefficients["Year1975", 4],
intercept = mod.sum$coefficients[1,1]
)
}
# mblm does not return a standard error. Instead, summary.mblm can be used to extract the MAD or Median Absolute Deviation." It's a robust measure of variability that is less sensitive to outliers than the standard deviation, which is commonly used in traditional statistical analyses.
# create station-month key
monthlyDOMax <- monthlyDOMax %>%
mutate( key = paste(FDT_STA_ID, MonthNum, sep="_"),
model_slope = NA,
model_MAD = NA,
model_pval = NA,
model_intercept = NA)
for (i in 1:nrow(monthlyDOMax)) {
key = monthlyDOMax$key[i]
monthlyDOMax$model_slope[i] <- modelSummaries[[key]]$slope
monthlyDOMax$model_MAD[i] <- modelSummaries[[key]]$MAD
monthlyDOMax$model_pval[i] <- modelSummaries[[key]]$pvalue
monthlyDOMax$model_intercept[i] <- modelSummaries[[key]]$intercept
}
DOMax_final <- monthlyDOMax %>%
filter(!key %in% insufficient_Nobs)
#--------------------------------------------
# reg stats for August DOMax non-normalized with 1975 as y intercept
monthly_DOMax1975 <- monthlyDOMax %>%
mutate( model_slope = NA,
model_MAD = NA,
model_pval = NA,
model_intercept = NA) %>%
filter(MonthNum == 8)
for (i in 1:nrow(monthly_DOMax1975)) {
key = monthly_DOMax1975$FDT_STA_ID[i]
monthly_DOMax1975$model_slope[i] <- modelSummaries_DOMax1975[[key]]$slope
monthly_DOMax1975$model_MAD[i] <- modelSummaries_DOMax1975[[key]]$MAD
monthly_DOMax1975$model_pval[i] <- modelSummaries_DOMax1975[[key]]$pvalue
monthly_DOMax1975$model_intercept[i] <- modelSummaries_DOMax1975[[key]]$intercept
}
DOMax1975_final <- monthly_DOMax1975 %>%
filter(!key %in% insufficient_Nobs)
#------------------------------------------------
write_csv(DOMax_final, "output_data/DOMax_mblmModelStatistics.csv")
# do any month-stations return a 0 slope with sig pval? In this case, no
wonky_results <- DOMax_final %>%
filter(model_slope == 0 & model_pval <= 0.05)
paste(nrow(wonky_results), "station-months returned a slope of 0 with a significant p-value in DOMax trend analysis.", sep=" ")
## [1] "0 station-months returned a slope of 0 with a significant p-value in DOMax trend analysis."
a <- DOMin1975_final %>%
mutate(variable = "DOMin")
b <- DOMean1975_final %>%
mutate(variable = "DOMean")
c <- DORange1975_final %>%
mutate(variable = "DORange")
d <- DOMax1975_final %>%
mutate(variable = "DOMax")
e <- rbind(a, b, c, d)
readr::write_csv(e, "output_data/1975_y_intercept_DOVars_model_results.csv")
“a box-whisker plot showing the distribution of values by month across all stations.”
# custom function to prepare plotting data for each variable
makePlotData <- function(df) {
# derive mean warming and incorporate back into df for plotting
df.allMonths <- df %>%
group_by(FDT_STA_ID) %>%
summarize(mean_slope = mean(model_slope, na.rm = TRUE),
.groups = "drop") %>%
mutate(Month = "All Months",
trend= "N/A")
df.plot <- bind_rows(df, df.allMonths)
# populate model slope column with mean slope where Month == "All Months"
df.plot <- df.plot %>%
mutate(model_slope = if_else(Month == "All Months", mean_slope, model_slope))
# set month as factor
df.plot <- df.plot %>%
mutate(Month = factor(Month, levels = c("January", "February", "March",
"April", "May", "June", "July",
"August", "September", "October",
"November", "December", "All Months")),
trend = ifelse(is.na(df.plot$model_pval), "N/A",
ifelse(df.plot$model_pval <= 0.05, "significant", "not significant")))
df.plot$trend <- factor(df.plot$trend, levels = c("significant", "not significant", "N/A"))
return(df.plot)
}
DOMin_plot.df <- makePlotData(DOMin_final)
ggplot(DOMin_plot.df, aes(x = Month, y = model_slope, fill = Month)) +
geom_boxplot(alpha = .85) +
labs(title = "Variation in Warming Rates by Month (DOMin)",
x = "Month",
y = "T Trend (C/y)",
fill = "Month") +
theme_minimal() +
theme(axis.title.x = element_text(margin = margin(t = 10)),
legend.position = "none")
ggsave("output_data/DOMin_variationByMonth.jpg")
## Saving 7 x 5 in image
DOMean_plot.df <- makePlotData(DOMean_final)
ggplot(DOMean_plot.df, aes(x = Month, y = model_slope, fill = Month)) +
geom_boxplot(alpha = .85) +
labs(title = "Variation in Warming Rates by Month (DOMean)",
x = "Month",
y = "T Trend (C/y)",
fill = "Month") +
theme_minimal() +
theme(axis.title.x = element_text(margin = margin(t = 10)),
legend.position = "none")
ggsave("output_data/DOMean_variationByMonth.jpg")
## Saving 7 x 5 in image
DORange_plot.df <- makePlotData(DORange_final)
ggplot(DORange_plot.df, aes(x = Month, y = model_slope, fill = Month)) +
geom_boxplot(alpha = .85) +
labs(title = "Variation in Warming Rates by Month (DORange)",
x = "Month",
y = "T Trend (C/y)",
fill = "Month") +
theme_minimal() +
theme(axis.title.x = element_text(margin = margin(t = 10)),
legend.position = "none")
ggsave("output_data/DORange_variationByMonth.jpg")
## Saving 7 x 5 in image