Commit 645492db by Jan Wijffels

Add the R package

parent ab43a150
......@@ -15,11 +15,12 @@ Aan het werk
```r
## Install packages
install.packages(c("flexdashboard", "shiny", "shinyWidgets", "rmarkdown"))
install.packages(c("flexdashboard", "shiny", "shinyWidgets", "rmarkdown", "DT"))
install.packages(c("data.table", "httr", "readxl", "magrittr"))
install.packages(c("leaflet", "sp", "rgeos", "magrittr"))
devtools::install_github("bnosac/BelgiumMaps.Statbel")
devtools::install_github("52North/sensorweb4R")
devtools::install_git("https://www.datatailor.be/vmm/citizenair", subdir = "citizenair")
## Run the app
rmarkdown::run("apps/citizenair.Rmd")
......
......@@ -11,3 +11,9 @@ Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
BuildType: Package
PackageUseDevtools: Yes
PackagePath: citizenair
PackageInstallArgs: --no-multiarch --with-keep.source
PackageRoxygenize: rd,collate,namespace
Package: citizenair
Type: Package
Title: Citizen Air Analysis Utilities
Version: 0.1
Maintainer: Jan Wijffels <jwijffels@bnosac.be>
Authors@R: c(
person('Jan', 'Wijffels', role = c('aut', 'cre', 'cph'), email = 'jwijffels@bnosac.be'),
person('BNOSAC', role = 'cph'))
Description: Utilities for the Citizen Air application.
License: CC-BY-SA-4.0
RoxygenNote: 6.0.1
Imports: readxl, cellranger, data.table, fasttime
Suggests: sp
# Generated by roxygen2: do not edit by hand
S3method(as.data.frame,citizenair)
export(read_citizenair)
importFrom(cellranger,cell_cols)
importFrom(cellranger,cell_limits)
importFrom(cellranger,cell_rows)
importFrom(data.table,as.data.table)
importFrom(data.table,melt.data.table)
importFrom(data.table,rbindlist)
importFrom(data.table,setnames)
importFrom(fasttime,fastPOSIXct)
importFrom(readxl,excel_sheets)
importFrom(readxl,read_excel)
importFrom(utils,capture.output)
importFrom(utils,head)
importFrom(utils,tail)
#' @importFrom readxl read_excel excel_sheets
#' @importFrom cellranger cell_rows cell_cols cell_limits
#' @importFrom utils head tail capture.output
#' @importFrom data.table rbindlist melt.data.table setnames as.data.table
#' @importFrom fasttime fastPOSIXct
NULL
#' @title Read in a CitizenAir excel file
#' @description Read in a CitizenAir excel file
#' @param file the path to the excel file
#' @param name the name of the file, defaults to the basename of the file
#' @export
#' @return
#' an object of class \code{citizenair} which is a list with elements
#' \enumerate{
#' \item{filename: the name of the file}
#' \item{file: the full path to the excel file}
#' \item{time: the timestamp when the data was imported}
#' \item{content: a list where each list element contains the elements meta and data where meta and data are described below}
#' }
#' Content element meta is a list containing
#' \enumerate{
#' \item{device: the name of the device}
#' \item{lat: the latitude of the device address}
#' \item{lon: the longitude of the device address}
#' \item{sheet_id: an identifier which is the sequence number of the sheet}
#' \item{sheet: the sheet name which was read in}
#' \item{phenomena: a list containing phenomena measured in the data. Each list element is a character vector with the measurement unit.}
#' }
#' Content element data is a data.frame with the measurements containing
#' \enumerate{
#' \item{datum: the date - in POSIXct format returned by readxl::read_excel when setting that field to date}
#' \item{time: the time - in POSIXct format returned by readxl::read_excel when setting that field to date}
#' \item{several columns, one for each phenomena indicated in the \code{meta} element}
#' }
#' @examples
#' filename <- system.file(package = "citizenair", "data-raw", "test_dummy.xls")
#' x <- read_citizenair(filename)
#' meta <- as.data.frame(x, type = "meta")
#' measurements <- as.data.frame(x, type = "measurements")
read_citizenair <- function(file = system.file(package = "citizenair", "data-raw", "test_dummy.xls"),
name = basename(file)){
file <- path.expand(file)
result <- list()
result$filename <- name
result$file <- file
result$time <- Sys.time()
sheets <- readxl::excel_sheets(file)
result$content <- list()
for(i in seq_along(sheets)){
sheet <- sheets[i]
sheet_details <- read_sheet_info(file, sheet)
result$content[[sheet]] <- list()
result$content[[sheet]]$meta <- read_sheet_meta(file, sheet, sheet_details)
result$content[[sheet]]$meta$sheet_id <- i
result$content[[sheet]]$meta$phenomena <- read_sheet_units(file, sheet, sheet_details)
result$content[[sheet]]$data <- read_sheet_measurements(file, sheet, sheet_details)
}
class(result) <- "citizenair"
result
}
#' @title Convert excel data to a data.frame
#' @description Convert excel data to a data.frame
#' @param x an object of class \code{citizenair} as returned by \code{\link{read_citizenair}}
#' @param type either 'meta' to get the meta information as a data.frame or 'measurements' to get the measurements data.frame
#' @param row.names not used
#' @param optional not used
#' @param ... not used
#' @export
#' @return a data.frame is returned
#' \enumerate{
#' \item{in case type is meta: contains fields sheet_id, sheet, device, lat, lon, phenomena}
#' \item{in case type is data: contains fields sheet_id, timepoint, date, phenomena, value}
#' }
#' @examples
#' filename <- system.file(package = "citizenair", "data-raw", "test_dummy.xls")
#' x <- read_citizenair(filename)
#' meta <- as.data.frame(x, type = "meta")
#' measurements <- as.data.frame(x, type = "measurements")
as.data.frame.citizenair <- function(x, row.names, optional, ..., type = c("meta", "measurements")){
type <- match.arg(type)
if(type == "meta"){
x <- lapply(x$content, FUN=function(x){
x <- x$meta
x$phenomena <- paste(names(x$phenomena), collapse = ", ")
x$meta[, c("sheet_id", "sheet", "device", "lat", "lon", "phenomena")]
x
})
x <- rbindlist(x)
}else{
x <- lapply(x$content, FUN=function(x){
meta <- x$meta
x <- x$data
measurements <- setdiff(colnames(x), c("datum", "tijd"))
x$datum <- as.Date(x$datum, tz = "UTC")
x$datum <- as.POSIXct(x$datum, tz = "UTC")
if(!all(is.na(x$tijd))){
idx <- which(!is.na(x$tijd))
x$datum[idx] <- fasttime::fastPOSIXct(sprintf("%s %s", format(x$datum[idx], "%Y-%m-%d"), format(x$tijd[idx], "%H:%M:%S")), tz = "UTC")
}
x <- as.data.table(x)
x <- data.table::melt.data.table(data = x, id.vars = "datum", measure.vars = measurements, na.rm = TRUE, value.name = "value", variable.name = "phenomena")
x <- data.table::setnames(x, old = "datum", new = "timepoint")
x$date <- as.Date(x$timepoint)
x$sheet_id <- meta$sheet_id
x <- data.table::setcolorder(x, c("sheet_id", "timepoint", "date", "phenomena", "value"))
x
})
x <- rbindlist(x)
}
x
}
read_sheet_info <- function(file, sheet){
message <- capture.output(
x <- readxl::read_excel(path = file, sheet = sheet, col_types = "text", range = cellranger::cell_rows(c(NA, 20)), col_names = FALSE),
type = "message")
x <- as.matrix(x)
## find word meettoestel and look to the text next to the word meettoestel
meettoestel <- apply(x, MARGIN=1, FUN=function(x) any(grepl(pattern = "meettoestel", x, ignore.case = TRUE)))
meettoestel_row <- which.min(!meettoestel)
meettoestel_col <- which.max(grepl(pattern = "meettoestel", x[meettoestel_row, ], ignore.case = TRUE)) + 1
## find word meetpunt and look to the text next to the word meetpunt
latlon <- apply(x, MARGIN=1, FUN=function(x) any(grepl(pattern = "meetpunt", x, ignore.case = TRUE)))
latlon_row <- which.min(!latlon)
latlon_col <- which.max(grepl(pattern = "meetpunt", x[latlon_row, ], ignore.case = TRUE)) + 1
## find the location where datum/variabele/tijd are on the same row, these are the headers,
## the units should be one row below that and the data should start 2 rows below that
data_start <- apply(x, MARGIN=1, FUN=function(x){
any(grepl(pattern = "datum", x, ignore.case = TRUE)) &
any(grepl(pattern = "variabele", x, ignore.case = TRUE)) &
any(grepl(pattern = "tijd", x, ignore.case = TRUE))
})
data_start_headers <- which.min(!data_start)
## See how much rows we have - identified by the datum which should always be there
datum_column <- grep(pattern = "datum", x[data_start_headers, ], ignore.case = TRUE)
datums <- readxl::read_excel(path = file, sheet = sheet, col_types = "text", range = cellranger::cell_cols(datum_column), col_names="key")$key
datums <- datums[-seq_len(data_start_headers + 1)]
## Find the locations of the columns
measurements <- readxl::read_excel(path = file, sheet = sheet, col_types = "text", range = cellranger::cell_rows(data_start_headers))
measurements <- names(measurements)
measurements_columns <- c(which.min(!tolower(measurements) %in% c("datum", "tijd")), length(measurements))
measurements_fields <- measurements[measurements_columns[1]:measurements_columns[2]]
## Return result
result <- list(meettoestel = c(meettoestel_row, meettoestel_col),
latlon = c(latlon_row, latlon_col),
data_headers = data_start_headers,
data_start = data_start_headers + 2,
measurements_columns = measurements_columns,
measurements_fields = measurements_fields,
data_n = length(datums))
class(result) <- "citizenair_excel_locations"
result
}
read_sheet_meta <- function(file, sheet, sheet_info = read_sheet_info(file, sheet)){
result <- list()
result$device <- readxl::read_excel(path = file, sheet = sheet,
range = cellranger::cell_limits(c(sheet_info$meettoestel[1], sheet_info$meettoestel[2]), c(sheet_info$meettoestel[1], sheet_info$meettoestel[2])),
col_names = "key",
col_types = "text")$key
result$gps <- readxl::read_excel(path = file, sheet = sheet,
range = cellranger::cell_limits(c(sheet_info$latlon[1], sheet_info$latlon[2]), c(sheet_info$latlon[1], sheet_info$latlon[2])),
col_names = "key",
col_types = "text")$key
result$latlon <- as.numeric(unlist(strsplit(result$gps, split = ",")))
result$lat <- head(result$latlon, 1)
result$lon <- tail(result$latlon, 1)
result$sheet <- sheet
result <- result[c("sheet", "device", "lat", "lon")]
result
}
read_sheet_units <- function(file, sheet, sheet_info = read_sheet_info(file, sheet)){
units <- readxl::read_excel(path = file, sheet = sheet,
range = cellranger::cell_limits(ul = c(sheet_info$data_headers, sheet_info$measurements_columns[1]),
lr = c(sheet_info$data_headers+1, sheet_info$measurements_columns[2])),
col_names = TRUE, col_types = "text")
units <- as.list(units)
## remove 'Datum' / 'Tijd'
units <- units[-which(tolower(names(units)) %in% tolower(c("Datum", "Tijd")))]
units
}
read_sheet_measurements <- function(file, sheet, sheet_info = read_sheet_info(file, sheet)){
suppressWarnings(
message <- capture.output(
x <- readxl::read_excel(path = file, sheet = sheet,
range = cellranger::cell_limits(ul = c(sheet_info$data_start, sheet_info$measurements_columns[1]),
lr = c(sheet_info$data_start + sheet_info$data_n - 1, sheet_info$measurements_columns[2])),
col_names = sheet_info$measurements_fields, guess_max = 10000,
col_types = c("date", "date", rep("numeric", length(sheet_info$measurements_fields) - 2))),
type = "message"))
class(x) <- c("data.frame", "citizenair_rawdata")
idx <- match(c("datum", "tijd"), table = tolower(colnames(x)))
if(length(idx) != 2){
stop("Datum + Tijd not present in data")
}
x <- data.table::setnames(x, old = colnames(x)[idx], new = c("datum", "tijd"))
x
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read-citizendata.R
\name{as.data.frame.citizenair}
\alias{as.data.frame.citizenair}
\title{Convert excel data to a data.frame}
\usage{
\method{as.data.frame}{citizenair}(x, row.names, optional, ...,
type = c("meta", "measurements"))
}
\arguments{
\item{x}{an object of class \code{citizenair} as returned by \code{\link{read_citizenair}}}
\item{row.names}{not used}
\item{optional}{not used}
\item{...}{not used}
\item{type}{either 'meta' to get the meta information as a data.frame or 'measurements' to get the measurements data.frame}
}
\value{
a data.frame is returned
\enumerate{
\item{in case type is meta: contains fields sheet_id, sheet, device, lat, lon, phenomena}
\item{in case type is data: contains fields sheet_id, timepoint, date, phenomena, value}
}
}
\description{
Convert excel data to a data.frame
}
\examples{
filename <- system.file(package = "citizenair", "data-raw", "test_dummy.xls")
x <- read_citizenair(filename)
meta <- as.data.frame(x, type = "meta")
measurements <- as.data.frame(x, type = "measurements")
}
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/read-citizendata.R
\name{read_citizenair}
\alias{read_citizenair}
\title{Read in a CitizenAir excel file}
\usage{
read_citizenair(file = system.file(package = "citizenair", "data-raw",
"test_dummy.xls"), name = basename(file))
}
\arguments{
\item{file}{the path to the excel file}
\item{name}{the name of the file, defaults to the basename of the file}
}
\value{
an object of class \code{citizenair} which is a list with elements
\enumerate{
\item{filename: the name of the file}
\item{file: the full path to the excel file}
\item{time: the timestamp when the data was imported}
\item{content: a list where each list element contains the elements meta and data where meta and data are described below}
}
Content element meta is a list containing
\enumerate{
\item{device: the name of the device}
\item{lat: the latitude of the device address}
\item{lon: the longitude of the device address}
\item{sheet_id: an identifier which is the sequence number of the sheet}
\item{sheet: the sheet name which was read in}
\item{phenomena: a list containing phenomena measured in the data. Each list element is a character vector with the measurement unit.}
}
Content element data is a data.frame with the measurements containing
\enumerate{
\item{datum: the date - in POSIXct format returned by readxl::read_excel when setting that field to date}
\item{time: the time - in POSIXct format returned by readxl::read_excel when setting that field to date}
\item{several columns, one for each phenomena indicated in the \code{meta} element}
}
}
\description{
Read in a CitizenAir excel file
}
\examples{
filename <- system.file(package = "citizenair", "data-raw", "test_dummy.xls")
x <- read_citizenair(filename)
meta <- as.data.frame(x, type = "meta")
measurements <- as.data.frame(x, type = "measurements")
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment