Commit 51d54d9f by Jan Wijffels

add citizenair_compare

parent f86d6bee
#' @title Compare citizen data to official data
#' @description Compare citizen data to official data
#' @param x a data.frame with columns time/group/value
#' @param reference a character string of length 1 indicating in the \code{group} column of \code{x} what is the reference value.
#' Defaults to 'official'
#' @param sensor a character string of length 1 indicating in the \code{group} column of \code{x} what is the comparison value.
#' Defaults to 'citizen'
#' @return an object of class \code{citizenair_comparison_ircel} which is a list with elements
#' \enumerate{
#' \item{data: the data containing columns time, REF, SENSOR, OLSQ, lower_CI, upper_CI, lower_PI, upper_PI and e}
#' \item{model: an object of class lm represendint the model SENSOR ~ REF}
#' \item{overview: a data.frame with columns statistic, time, REF, SENSOR, OLSQ, lower_CI, upper_CI, lower_PI, upper_PI and e with the
#' minimum/Q25/median/mean/Q75/max for each of these columns as well as the number of valid data points and the number of NA values}
#' }
#' @export
#' @examples
#' x <- data.frame(
#' time = rep(seq.Date(from = Sys.Date()-364, to = Sys.Date(), by = "day"), 2),
#' group = sample(c("official", "citizen"), size = 365*2, replace = TRUE),
#' value = rnorm(365*2))
#' comparison <- citizenair_compare(x, reference = "official", sensor = "citizen")
#' plot(comparison)
citizenair_compare <- function(x, reference = "official", sensor = "citizen"){
# r cmd check happiness
.SD <- OLSQ <- REF <- SENSOR <- lower_CI <- lower_PI <- upper_CI <- upper_PI <- NULL
stopifnot(is.data.frame(x))
stopifnot(all(c("time", "group", "value") %in% colnames(x)))
group_values <- unique(x$group)
group_values <- setdiff(group_values, NA)
x <- x[x$group %in% c(reference, sensor), ]
x$group <- txt_recode(x$group, from = c(reference, sensor), to = c("REF", "SENSOR"))
x <- data.table::setDT(x)
mydata <- data.table::dcast.data.table(data = x, formula = time ~ group,
fun.aggregate = mean, na.rm=TRUE,
value.var = "value")
mydata <- data.table::setDF(mydata)
linear_model <- lm(SENSOR ~ REF, data = mydata)
PI <- predict(linear_model, interval = "predict", level = 0.95, newdata = mydata, na.action = na.pass)
PI <- as.data.frame(PI)
colnames(PI) <- c("OLSQ", "lower PI", "upper PI")
CI <- predict(linear_model, interval = "confidence", level = 0.95, newdata = mydata, na.action = na.pass)
CI <- as.data.frame(CI)
colnames(CI) <- c("OLSQ", "lower CI", "upper CI")
mydata[["OLSQ"]] <- CI[["OLSQ"]]
mydata[["lower_CI"]] <- CI[["lower CI"]]
mydata[["upper_CI"]] <- CI[["upper CI"]]
mydata[["lower_PI"]] <- PI[["lower PI"]]
mydata[["upper_PI"]] <- PI[["upper PI"]]
mydata$OLSQ <- CI$OLSQ
mydata$OLSQ <- CI$OLSQ
mydata$OLSQ <- CI$OLSQ
mydata$OLSQ <- CI$OLSQ
mydata$OLSQ <- CI$OLSQ
mydata$e <- mydata$OLSQ - mydata$SENSOR
mydata <- mydata[order(mydata$time, decreasing = FALSE), ]
overview <- data.table::as.data.table(mydata)
overview <- overview[, lapply(.SD, FUN=function(x){
summarystats <- summary(x)
if(inherits(x, c("Date", "POSIXct", "POSIXlt"))){
summarystats <- as.character(summarystats)
}
list("min" = summarystats[["Min."]],
"25th Perc" = summarystats[["1st Qu."]],
"median" = summarystats[["Median"]],
"mean" = summarystats[["Mean"]],
"75th Perc" = summarystats[["3rd Qu."]],
"max" = summarystats[["Max."]],
"\u0023 Valid" = sum(!is.na(x)),
"\u0023 NA" = sum(is.na(x))
)
}), .SDcols = colnames(mydata)]
overview <- data.table::setDF(overview)
overview[["statistic"]] <- c("min", "25th Perc", "median", "mean", "75th Perc", "max", "\u0023 Valid", "\u0023 NA's")
overview <- overview[, c("statistic", setdiff(colnames(overview), "statistic"))]
result <- list(data = mydata,
overview = overview,
model = linear_model)
class(result) <- "citizenair_comparison_ircel"
result
}
#' @title Plot functionality for an object of class \code{citizenair_comparison_ircel} as returned by \code{citizenair_compare}
#' @description Plot functionality for an object of class \code{citizenair_comparison_ircel} as returned by \code{\link{citizenair_compare}}
#' @param x an object of class \code{citizenair_comparison_ircel} as returned by \code{\link{citizenair_compare}}
#' @param ... further arguments, currently not used yet
#' @export
#' @examples
#' library(ggplot2)
#' x <- data.frame(
#' time = rep(seq.Date(from = Sys.Date()-364, to = Sys.Date(), by = "day"), 2),
#' group = sample(c("official", "citizen"), size = 365*2, replace = TRUE),
#' value = rnorm(365*2))
#' comparison <- citizenair_compare(x, reference = "official", sensor = "citizen")
#' plot(comparison)
plot.citizenair_comparison_ircel <- function(x, ...){
OLSQ <- REF <- SENSOR <- lower_CI <- lower_PI <- upper_CI <- upper_PI <- NULL
linear_model <- x$model
mydata <- x$data
plot_PI_CI <- ggplot2::ggplot(data = mydata, ggplot2::aes(x = REF, y = SENSOR)) +
ggplot2::geom_ribbon(ggplot2::aes(ymin = lower_CI, ymax=upper_CI, fill = "CI"), alpha = 0.6) +
ggplot2::geom_ribbon(ggplot2::aes(ymin = lower_PI, ymax=upper_PI, fill = "PI"), alpha = 0.4) +
ggplot2::geom_point() +
ggplot2::geom_line(ggplot2::aes(y = OLSQ,col = "OLSQ")) +
ggplot2::geom_line(ggplot2::aes(y = REF,col = "y=x")) +
ggplot2::labs(title = paste("Scatterplot with regression estimates: intercept =",
format(summary(linear_model)$coefficients[1,1], digits = 3),
", slope =",format(summary(linear_model)$coefficients[2,1], digits = 3),
", R\u00B2 =",format(summary(linear_model)$r.squared, digits = 3)),
x = "Reference value",
y = "Sensor") +
ggplot2::scale_fill_manual(name="",values=c("CI"="blue",
"PI"= "grey")) +
ggplot2::scale_colour_manual(name="", values = c("y=x"="red",
"OLSQ"="black"))
plot_PI_CI
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment