@@ -7,3 +7,9 @@ | |||
^codecov\.yml$ | |||
^doc$ | |||
^Meta$ | |||
^_pkgdown\.yml$ | |||
^docs$ | |||
^pkgdown$ | |||
^index\.Rmd$ | |||
^index\.md$ | |||
^index\.html$ |
@@ -1,5 +1,6 @@ | |||
inst/doc | |||
README.html | |||
index.html | |||
/R/.Rhistory | |||
/TAGS | |||
doc | |||
@@ -0,0 +1,3 @@ | |||
[submodule "website"] | |||
path = docs | |||
url = git@github.com:desctable/desctable.github.io |
@@ -1,6 +1,6 @@ | |||
Package: desctable | |||
Title: Produce Descriptive and Comparative Tables Easily | |||
Version: 0.2.0 | |||
Version: 0.3.0 | |||
Authors@R: c(person("Maxime", "Wack", email = "maximewack@free.fr", role = c("aut", "cre")), | |||
person("Adrien", "Boukobza", email = "hadrien_b@hotmail.fr", role = c("aut")), | |||
person("Yihui", "Xie", email = "xieyihui@gmail.com", role = c("ctb"))) | |||
@@ -14,13 +14,15 @@ Depends: | |||
License: GPL-3 | |||
Encoding: UTF-8 | |||
LazyData: true | |||
URL: https://github.com/maximewack/desctable | |||
URL: https://desctable.github.io | |||
BugReports: https://github.com/maximewack/desctable/issues | |||
Imports: | |||
dplyr, | |||
DT, | |||
htmltools, | |||
rlang | |||
rlang, | |||
tidyr, | |||
utils | |||
Suggests: | |||
knitr, | |||
rmarkdown, | |||
@@ -3,15 +3,22 @@ | |||
S3method(as.data.frame,desctable) | |||
S3method(datatable,default) | |||
S3method(datatable,desctable) | |||
S3method(desc_table,data.frame) | |||
S3method(desc_table,default) | |||
S3method(desc_table,grouped_df) | |||
S3method(desctable,default) | |||
S3method(desctable,grouped_df) | |||
S3method(pander,desctable) | |||
S3method(print,desctable) | |||
export("!!!") | |||
export("%>%") | |||
export(ANOVA) | |||
export(IQR) | |||
export(chisq.test) | |||
export(datatable) | |||
export(desc_output) | |||
export(desc_table) | |||
export(desc_tests) | |||
export(desctable) | |||
export(fisher.test) | |||
export(group_by) | |||
@@ -26,3 +33,4 @@ export(tests_auto) | |||
importFrom(dplyr,"%>%") | |||
importFrom(dplyr,group_by) | |||
importFrom(pander,pander) | |||
importFrom(rlang,"!!!") |
@@ -1,52 +0,0 @@ | |||
Version 0.2.0 | |||
- Add support for purrr::map-like formulas for statistical and test functions | |||
- "conditional" formulas are no longer supported. For example, replace `~ is.normal ~ mean | median` with `~ if (is.normal(.)) mean(.) else median(.)` | |||
Version 0.1.9 | |||
- Fix in default options for datatable output to follow DT update | |||
- Fix pander export for R ≥ 4.0.0 | |||
Version 0.1.8 | |||
- Code cleanup: | |||
- use RStudio style guidelines for all code and docs | |||
- use fewer tidyverse functions internally, drop `purrr` dependancy | |||
Version 0.1.7 | |||
- Vignette and README with RStudio style guidelines | |||
Version 0.1.6 | |||
- Correct way to re-export `group_by` and `%>%` | |||
Version 0.1.4-5 | |||
- Documentation fix for group_by import (dplyr 0.8) | |||
- Correct description for Dates | |||
Version 0.1.3 | |||
- Added travis CI to repo | |||
- Added CRAN badge to repo | |||
- Corrected a typo in the README and vignette | |||
- Bugfix for when a statistical function returns a warning AND an error | |||
- Added NEWS file for future releases | |||
- Modified default and auto statistics for less automation | |||
- Added dummy column name for the variable names column, and make dplyr happy | |||
- Enabled character and mixed columns | |||
- Added CRAN mirror downloads badge to repo | |||
Version 0.1.2 | |||
- Added a fix for a change in evaluation in rlang | |||
Version 0.1.1 | |||
- Added a fix for changing arguments in fisher.test in upstream | |||
Version 0.1.0 | |||
Initial release. |
@@ -0,0 +1,81 @@ | |||
# desctable 0.3.0 | |||
#### New features | |||
- NEW API: desctable is now `desc_table`, `desc_tests`, and `desc_output` | |||
- New internal format: desctable uses dataframes with list-columns internally instead of nested lists | |||
- New website with *pkgdown* | |||
#### Bugfix | |||
- Conditional formulas were hard-deprecated in 0.2.0, and have been soft-deprecated. | |||
# desctable 0.2.0 | |||
#### New features | |||
- Add support for `purrr::map`-like formulas for statistical and test functions | |||
- Conditional formulas will be deprecated in 1.0. For example, replace `is.normal ~ mean | median` with `~ if (is.normal(.)) mean(.) else median(.)` | |||
# desctable 0.1.9 | |||
#### Bugfixes | |||
- Fix in default options for datatable output following DT update | |||
- Fix pander export for R ≥ 4.0.0 | |||
# desctable 0.1.8 | |||
#### Code cleanup | |||
- Use RStudio style guidelines for all code and docs | |||
- Use fewer tidyverse functions internally, drop `purrr` dependancy | |||
# desctable 0.1.7 | |||
#### Code cleanup | |||
- Correct vignette and README with RStudio style guidelines | |||
# desctable 0.1.6 | |||
#### Bugfixes | |||
- Correct way to re-export `group_by` and `%>%` | |||
# desctable 0.1.4-5 | |||
#### Bugfixes | |||
- Documentation fix for group_by import (dplyr 0.8) | |||
- Correct description for Dates | |||
# desctable 0.1.3 | |||
#### CRAN publication update | |||
- Add travis CI to repo | |||
- Add CRAN badge to repo | |||
- Correct a typo in the README and vignette | |||
- Bugfix for when a statistical function returns a warning AND an error | |||
- Add NEWS file for future releases | |||
- Modify default and auto statistics for less automation | |||
- Add dummy column name for the variable names column, and make dplyr happy | |||
- Enable character and mixed columns | |||
- Add CRAN mirror downloads badge to repo | |||
# desctable 0.1.2 | |||
#### Bugfix | |||
- Fix for a change in evaluation in rlang | |||
# desctable 0.1.1 | |||
#### Bugfix | |||
- Fix for changing arguments in fisher.test in upstream | |||
# desctable 0.1.0 | |||
- Initial release. |
@@ -8,13 +8,15 @@ | |||
#' | |||
#' @param stat The statistic to use | |||
#' @param data The dataframe to apply the statistic to | |||
#' @keywords internal | |||
#' @return A vector for one statistic column | |||
statColumn <- function(stat, data) { | |||
# Apply one statified stat function to every variable in the data | |||
# Return a simple vector for the column | |||
# Statify checks types and output for the stat function. Returns a numeric vector or a character vector if needed. | |||
if (length(stat) == 3) | |||
if (length(stat) == 3) # remove after 1.0 | |||
warning("Conditional formulas are deprecated and will be removed in 1.0.0 | |||
purrr::map style formulas are used now. | |||
For example, `is.normal ~ mean | median` becomes `~ if (is.normal(.)) mean(.) else median(.)`") | |||
@@ -32,11 +34,12 @@ For example, `is.normal ~ mean | median` becomes `~ if (is.normal(.)) mean(.) el | |||
#' | |||
#' @param data The dataframe to apply the statistic to | |||
#' @param stats A list of named statistics to use | |||
#' @keywords internal | |||
#' @return A dataframe of all statistics for all variables | |||
statTable <- function(data, stats) { | |||
# If stats is a function, apply it to the data to obtain a list of stat functions | |||
# Else use the function list as-is | |||
if (is.function(stats)) stats = stats(data) | |||
if (is.function(stats)) stats = stats(data) # remove after 1.0 | |||
# Compute a statColumn for every stat function in stats | |||
# Assemble the result in a dataframe | |||
@@ -59,6 +62,7 @@ statTable <- function(data, stats) { | |||
#' | |||
#' @param data The dataframe to get the names from | |||
#' @param labels The optional named character vector containing the keypairs var = "Label" | |||
#' @keywords internal | |||
#' @return A dataframe with one variable named "Variables", a character vector of variable names/labels and levels | |||
varColumn <- function(data, labels = NULL) { | |||
# Every variable name that exists in the labels is to be replaced with its corresponding label | |||
@@ -95,195 +99,220 @@ varColumn <- function(data, labels = NULL) { | |||
} | |||
#' Generate a statistics table | |||
#' | |||
#' Generate a statistics table with the chosen statistical functions, and tests if given a \code{"grouped"} dataframe. | |||
#' | |||
#' @section Labels: | |||
#' labels is an option named character vector used to make the table prettier. | |||
#' | |||
#' If given, the variable names for which there is a label will be replaced by their corresponding label. | |||
#' Create the pvalues column | |||
#' | |||
#' Not all variables need to have a label, and labels for non-existing variables are ignored. | |||
#' @param df Dataframe to use for the tests | |||
#' @param tests Test function or list of functions | |||
#' @param grp Grouping factor | |||
#' @keywords internal | |||
#' @return A numeric vector of pvalues | |||
testColumn <- function(df, tests, grp) { | |||
group <- eval(grp, df) | |||
df <- df[!names(df) %in% as.character(grp)] | |||
# If tests is a function, apply it to the data and the grouping factor to produce a list of tests | |||
# If there is an .auto element in the list of tests, apply the function as previously to select the relevant test | |||
# If there is a .default element, use it as tests | |||
# Else fall back on kruskal.test | |||
if (is.function(tests)) { # remove after 1.0 | |||
ftests <- lapply(df, tests, factor(group)) | |||
tests <- ftests | |||
} else if (!is.null(tests$.default)) ftests <- lapply(df, function(x){tests$.default}) | |||
else if (!is.null(tests$.auto)) ftests <- lapply(df, tests$.auto, factor(group)) | |||
else ftests <- lapply(df, function(x){stats::kruskal.test}) | |||
# Select the forced (named) tests | |||
tests %>% | |||
names() %>% | |||
setdiff(".auto") %>% | |||
intersect(names(df)) -> forced_tests | |||
# Assemble the complete list of tests to compute | |||
ftests[names(ftests) %in% forced_tests][forced_tests] <- tests[forced_tests] | |||
# Compute the tests (made safe with testify) on the variable, using the grouping variable | |||
mapply(testify, df, ftests, MoreArgs = list(group = group), SIMPLIFY = F) %>% | |||
Reduce(f = rbind) | |||
} | |||
#' Generate a statistics table | |||
#' | |||
#' labels must be given in the form c(unquoted_variable_name = "label") | |||
#' Generate a statistics table with the chosen statistical functions, nested if called with a grouped dataframe. | |||
#' | |||
#' @section Stats: | |||
#' The stats can be a function which takes a dataframe and returns a list of statistical functions to use. | |||
#' The statistical functions to use in the table are passed as additional arguments. | |||
#' If the argument is named (eg. \code{N = length}) the name will be used as the column title instead of the function | |||
#' name (here, \strong{N} instead of \strong{length}). | |||
#' | |||
#' stats can also be a named list of statistical functions, or purrr::map like formulas. | |||
#' Any R function can be a statistical function, as long as it returns only one value when applied to a vector, or as | |||
#' many values as there are levels in a factor, plus one. | |||
#' | |||
#' The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats. | |||
#' Users can also use \code{purrr::map}-like formulas as quick anonymous functions (eg. \code{Q1 = ~ quantile(., .25)} to get the first quantile in a | |||
#' column named \strong{Q1}) | |||
#' | |||
#' @section Tests: | |||
#' The tests can be a function which takes a variable and a grouping variable, and returns an appropriate statistical test to use in that case. | |||
#' If no statistical function is given to \code{desc_table}, the \code{.auto} argument is used to provide a function | |||
#' that automatically determines the most appropriate statistical functions to use based on the contents of the table. | |||
#' | |||
#' tests can also be a named list of statistical test functions, associating the name of a variable in the data and a test to use specifically for that variable. | |||
#' @section Labels: | |||
#' \code{.labels} is a named character vector to provide "pretty" labels to variables. | |||
#' | |||
#' That test name must be expressed as a single-term formula (e.g. \code{~t.test}), or a purrr::map like formula | |||
#' (e.g. \code{~t.test(., var.equal = T)}). You don't have to specify tests for all the variables: a default test for | |||
#' all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}. | |||
#' If given, the variable names for which there is a label will be replaced by their corresponding label. | |||
#' | |||
#' If data is a grouped dataframe (using \code{group_by}), subtables are created and statistic tests are performed over each sub-group. | |||
#' Not all variables need to have a label, and labels for non-existing variables are ignored. | |||
#' | |||
#' labels must be given in the form \code{c(unquoted_variable_name = "label")} | |||
#' | |||
#' @section Output: | |||
#' The output is a desctable object, which is a list of named dataframes that can be further manipulated. Methods for printing, using in \pkg{pander} and \pkg{DT} are present. Printing reduces the object to a dataframe. | |||
#' The output is either a dataframe in the case of a simple descriptive table, | |||
#' or nested dataframes in the case of a comparative table. | |||
#' | |||
#' @param data The dataframe to analyze | |||
#' @param stats A list of named statistics to apply to each element of the dataframe, or a function returning a list of named statistics | |||
#' @param tests A list of statistical tests to use when calling desctable with a grouped_df | |||
#' @param labels A named character vector of labels to use instead of variable names | |||
#' @return A desctable object, which prints to a table of statistics for all variables | |||
#' @param ... A list of named statistics to apply to each element of the dataframe, or a function returning a list of named statistics | |||
#' @param .auto A function to automatically determine appropriate statistics | |||
#' @param .labels A named character vector of variable labels | |||
#' @return A simple or grouped descriptive table | |||
#' @seealso \code{\link{stats_auto}} | |||
#' @seealso \code{\link{tests_auto}} | |||
#' @seealso \code{\link{print.desctable}} | |||
#' @seealso \code{\link{pander.desctable}} | |||
#' @seealso \code{\link{datatable.desctable}} | |||
#' @seealso \code{\link{IQR}} | |||
#' @seealso \code{\link{percent}} | |||
#' @export | |||
#' @family desc_table core functions | |||
#' @examples | |||
#' iris %>% | |||
#' desctable() | |||
#' desc_table() | |||
#' | |||
#' # Does the same as stats_auto here | |||
#' iris %>% | |||
#' desctable(stats = list("N" = length, | |||
#' "Mean" = ~ if (is.normal(.)) mean(.), | |||
#' "sd" = ~ if (is.normal(.)) sd(.), | |||
#' "Med" = stats::median, | |||
#' "IQR" = ~ if(!is.factor(.)) IQR(.))) | |||
#' | |||
#' # With labels | |||
#' mtcars %>% desctable(labels = c(hp = "Horse Power", | |||
#' cyl = "Cylinders", | |||
#' mpg = "Miles per gallon")) | |||
#' desc_table("N" = length, | |||
#' "Min" = min, | |||
#' "Q1" = ~quantile(., .25), | |||
#' "Med" = median, | |||
#' "Mean" = mean, | |||
#' "Q3" = ~quantile(., .75), | |||
#' "Max" = max, | |||
#' "sd" = sd, | |||
#' "IQR" = IQR) | |||
#' | |||
#' # With grouping on a factor | |||
#' iris %>% | |||
#' group_by(Species) %>% | |||
#' desctable(stats = stats_default) | |||
#' | |||
#' # With nested grouping, on arbitrary variables | |||
#' mtcars %>% | |||
#' group_by(vs, cyl) %>% | |||
#' desctable() | |||
#' | |||
#' # With grouping on a condition, and choice of tests | |||
#' iris %>% | |||
#' group_by(Petal.Length > 5) %>% | |||
#' desctable(tests = list(.auto = tests_auto, Species = ~chisq.test)) | |||
desctable <- function(data, stats, tests, labels) { | |||
UseMethod("desctable", data) | |||
#' desc_table(.auto = stats_auto) | |||
desc_table <- function(data, ..., .auto, .labels) { | |||
UseMethod("desc_table", data) | |||
} | |||
#' @rdname desctable | |||
#' @rdname desc_table | |||
#' @export | |||
desctable.default <- function(data, stats = stats_auto, tests, labels = NULL) { | |||
# Assemble the Variables and the statTable in a single desctable object | |||
list(Variables = varColumn(data, labels), | |||
stats = statTable(data, stats)) %>% | |||
set_desctable_class() | |||
desc_table.default <- function(data, ..., .auto, .labels) { | |||
stop("`desc_table` must be called on a data.frame") | |||
} | |||
#' @rdname desctable | |||
#' @rdname desc_table | |||
#' @export | |||
desctable.grouped_df <- function(data, stats = stats_auto, tests = tests_auto, labels = NULL) { | |||
# Get groups then ungroup dataframe | |||
grps <- dplyr::groups(data) | |||
data <- dplyr::ungroup(data) | |||
desc_table.data.frame <- function(data, ..., .labels = NULL, .auto = stats_auto) { | |||
# Assemble the Variables (excluding the grouping ones) and the subTables recursively in a single desctable object | |||
c(Variables = list(varColumn(data[!names(data) %in% (grps %>% lapply(as.character) %>% unlist())], labels)), | |||
subTable(data, stats, tests, grps)) %>% | |||
set_desctable_class() | |||
} | |||
stats <- rlang::dots_list(..., .named = T) | |||
if (length(stats) == 0 & is.null(.auto)) { | |||
stop("desc_table needs at least one statistic function, or an automatic function in .stats_auto") | |||
} else if (length(stats) == 0) { | |||
stats <- .auto(data) | |||
} | |||
#' Create the subtables names | |||
#' | |||
#' Create the subtables names, as | |||
#' factor: level (n=sub-group length) | |||
#' | |||
#' @param grp Grouping factor | |||
#' @param df Dataframe containing the grouping factor | |||
#' @return A character vector with the names for the subtables | |||
subNames <- function(grp, df) { | |||
paste0(as.character(grp), | |||
": ", | |||
eval(grp, df) %>% factor() %>% levels(), | |||
" (n=", | |||
summary(eval(grp, df) %>% factor() %>% stats::na.omit(), maxsum = Inf), | |||
")") | |||
# Assemble the Variables and the statTable in a single desctable object | |||
cbind(varColumn(data, .labels), | |||
statTable(data, stats)) | |||
} | |||
#' Create the pvalues column | |||
#' | |||
#' @param df Dataframe to use for the tests | |||
#' @param tests Test function or list of functions | |||
#' @param grp Grouping factor | |||
#' @return A numeric vector of pvalues | |||
testColumn <- function(df, tests, grp) { | |||
group <- eval(grp, df) | |||
#' @rdname desc_table | |||
#' @export | |||
desc_table.grouped_df <- function(data, ..., .auto = stats_auto, .labels = NULL) { | |||
# Get groups then ungroup dataframe | |||
grps <- dplyr::groups(data) | |||
df <- df[!names(df) %in% as.character(grp)] | |||
if (length(grps) > 1) { | |||
warning("Only the first group will be used") | |||
data <- dplyr::ungroup(data, !!! grps[-1]) | |||
} | |||
# If tests is a function, apply it to the data and the grouping factor to produce a list of tests | |||
# If there is an .auto element in the list of tests, apply the function as previously to select the relevant test | |||
# If there is a .default element, use it as tests | |||
# Else fall back on kruskal.test | |||
if (is.function(tests)) { | |||
ftests <- lapply(df, tests, factor(group)) | |||
tests <- ftests | |||
} else if (!is.null(tests$.auto)) ftests <- lapply(df, tests$.auto, factor(group)) | |||
else if (!is.null(tests$.default)) ftests <- lapply(df, function(x){tests$.default}) | |||
else ftests <- lapply(df, function(x){stats::kruskal.test}) | |||
stats <- rlang::dots_list(..., .named = T) | |||
# Select the forced (named) tests | |||
tests %>% | |||
names() %>% | |||
setdiff(".auto") %>% | |||
intersect(names(df)) -> forced_tests | |||
desctable <- tidyr::nest(data) | |||
# Assemble the complete list of tests to compute | |||
ftests[names(ftests) %in% forced_tests][forced_tests] <- tests[forced_tests] | |||
if (length(stats) == 0 & is.null(.auto)) { | |||
stop("desc_table needs at least one statistic function, or an automatic function in .stats_auto") | |||
} else if (length(stats) == 0) { | |||
stats <- lapply(desctable$data, .auto) | |||
} | |||
# Compute the tests (made safe with testify) on the variable, using the grouping variable | |||
mapply(testify, df, ftests, MoreArgs = list(group = group), SIMPLIFY = F) %>% | |||
Reduce(f = rbind) | |||
if (is.list(stats[[1]])) { | |||
desctable$.stats <- mapply(statTable, desctable$data, stats, SIMPLIFY = F) | |||
} else { | |||
desctable$.stats <- lapply(desctable$data, statTable, stats) | |||
} | |||
desctable$.vars <- list(varColumn(data[!names(data) %in% (grps %>% lapply(as.character) %>% unlist())], .labels)) | |||
desctable | |||
} | |||
#' Create a subtable in a grouped desctable | |||
#' Add tests to a desc_table | |||
#' | |||
#' @param df Dataframe to use | |||
#' @param stats Stats list/function to use | |||
#' @param tests Tests list/function to use | |||
#' @param grps List of symbols for grouping factors | |||
#' @return A nested list of statTables and testColumns | |||
subTable <- function(df, stats, tests, grps) { | |||
# Final group, compute tests | |||
if (length(grps) == 1) { | |||
group <- factor(eval(grps[[1]], df)) | |||
# Create the subtable stats | |||
df[!names(df) %in% as.character(grps[[1]])] %>% | |||
by(group, statTable, stats) %>% | |||
# Name the subtables with info about group and group size | |||
stats::setNames(subNames(grps[[1]], df)) -> stats | |||
# Create the subtable tests | |||
pvalues <- testColumn(df, tests, grps[[1]]) | |||
c(stats, tests = list(pvalues)) | |||
#' Add test statistics to a grouped desc_table, with the tests specified as \code{variable = test}. | |||
#' | |||
#' @section Tests: | |||
#' The statistical test functions to use in the table are passed as additional named arguments. Tests must be preceded | |||
#' by a formula tilde (\code{~}). | |||
#' \code{name = ~test} will apply test \code{test} to variable \code{name}. | |||
#' | |||
#' Any R test function can be used, as long as it returns an object containing a \code{p.value} element, which is the | |||
#' case for most tests returning an object of class \code{htest}. | |||
#' | |||
#' Users can also use \code{purrr::map}-like formulas as quick anonymous functions (eg. \code{~ t.test(., var.equal = T)} to | |||
#' compute a t test without the Welch correction. | |||
#' | |||
#' @param desctable A desc_table | |||
#' @param ... A list of statistical tests associated to variable names | |||
#' @param .auto A function to automatically determine the appropriate tests | |||
#' @param .default A default fallback test | |||
#' @seealso \code{\link{tests_auto}} | |||
#' @seealso \code{\link{no.test}} | |||
#' @seealso \code{\link{ANOVA}} | |||
#' @return A desc_table with tests | |||
#' @export | |||
#' @family desc_table core functions | |||
#' @examples | |||
#' iris %>% | |||
#' group_by(Species) %>% | |||
#' desc_table() %>% | |||
#' desc_tests(Sepal.Length = ~kruskal.test, | |||
#' Sepal.Width = ~oneway.test, | |||
#' Petal.Length = ~oneway.test(., var.equal = T), | |||
#' Petal.Length = ~oneway.test(., var.equal = F)) | |||
desc_tests <- function(desctable, .auto = tests_auto, .default = NULL, ...) { | |||
if (which.desctable(desctable) != "grouped") | |||
stop("Unexpected input. `desc_tests` must be used on the output of `desc_table` on a grouped dataframe.\n | |||
For example: iris %>% group_by(Species) %>% desc_table() %>% desc_tests") | |||
fulldata <- tidyr::unnest(desctable, "data") | |||
fulldata$.tests <- NULL | |||
fulldata$.stats <- NULL | |||
fulldata$.vars <- NULL | |||
tests <- list(...) | |||
if (!(all(names(desctable$data[[1]]) %in% names(tests))) & is.null(.auto) & is.null(.default)) { | |||
stop("desc_tests needs either a full specification of tests, or include a .auto or a .default function for non specified-tests") | |||
} else { | |||
group <- eval(grps[[1]], df) | |||
# Go through the next grouping levels and build the subtables | |||
df[!names(df) %in% as.character(grps[[1]])] %>% | |||
by(group, subTable, stats, tests, grps[-1]) %>% | |||
# Name the subtables with info about group and group size | |||
stats::setNames(subNames(grps[[1]], df)) | |||
tests <- c(list(...), list(.auto = .auto, .default = .default)) | |||
} | |||
desctable$.tests <- list(testColumn(fulldata, tests, as.symbol(names(desctable)[1]))) | |||
desctable | |||
} |
@@ -19,7 +19,8 @@ percent <- function(x) { | |||
#' @return The IQR | |||
#' @export | |||
IQR <- function(x) { | |||
base::diff(stats::quantile(x, c(0.25, 0.75), na.rm = T)) | |||
if (!is.factor(x)) | |||
base::diff(stats::quantile(x, c(0.25, 0.75), na.rm = T)) | |||
} | |||
@@ -0,0 +1,606 @@ | |||
#' @importFrom pander pander | |||
pander::pander | |||
#' Generate a statistics table | |||
#' | |||
#' Generate a statistics table with the chosen statistical functions, and tests if given a \code{"grouped"} dataframe. | |||
#' | |||
#' @section Labels: | |||
#' labels is an option named character vector used to make the table prettier. | |||
#' | |||
#' If given, the variable names for which there is a label will be replaced by their corresponding label. | |||
#' | |||
#' Not all variables need to have a label, and labels for non-existing variables are ignored. | |||
#' | |||
#' labels must be given in the form c(unquoted_variable_name = "label") | |||
#' | |||
#' @section Stats: | |||
#' The stats can be a function which takes a dataframe and returns a list of statistical functions to use. | |||
#' | |||
#' stats can also be a named list of statistical functions, or purrr::map like formulas. | |||
#' | |||
#' The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats. | |||
#' | |||
#' @section Tests: | |||
#' The tests can be a function which takes a variable and a grouping variable, and returns an appropriate statistical test to use in that case. | |||
#' | |||
#' tests can also be a named list of statistical test functions, associating the name of a variable in the data and a test to use specifically for that variable. | |||
#' | |||
#' That test name must be expressed as a single-term formula (e.g. \code{~t.test}), or a purrr::map like formula | |||
#' (e.g. \code{~t.test(., var.equal = T)}). You don't have to specify tests for all the variables: a default test for | |||
#' all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}. | |||
#' | |||
#' If data is a grouped dataframe (using \code{group_by}), subtables are created and statistic tests are performed over each sub-group. | |||
#' | |||
#' @section Output: | |||
#' The output is a desctable object, which is a list of named dataframes that can be further manipulated. Methods for printing, using in \pkg{pander} and \pkg{DT} are present. Printing reduces the object to a dataframe. | |||
#' | |||
#' @param data The dataframe to analyze | |||
#' @param stats A list of named statistics to apply to each element of the dataframe, or a function returning a list of named statistics | |||
#' @param tests A list of statistical tests to use when calling desctable with a grouped_df | |||
#' @param labels A named character vector of labels to use instead of variable names | |||
#' @return A desctable object, which prints to a table of statistics for all variables | |||
#' @seealso \code{\link{stats_auto}} | |||
#' @seealso \code{\link{tests_auto}} | |||
#' @seealso \code{\link{print.desctable}} | |||
#' @seealso \code{\link{pander.desctable}} | |||
#' @seealso \code{\link{datatable.desctable}} | |||
#' @export | |||
#' @keywords deprecated | |||
#' @examples | |||
#' iris %>% | |||
#' desctable() | |||
#' | |||
#' # Does the same as stats_auto here | |||
#' iris %>% | |||
#' desctable(stats = list("N" = length, | |||
#' "Mean" = ~ if (is.normal(.)) mean(.), | |||
#' "sd" = ~ if (is.normal(.)) sd(.), | |||
#' "Med" = stats::median, | |||
#' "IQR" = ~ if(!is.factor(.)) IQR(.))) | |||
#' | |||
#' # With labels | |||
#' mtcars %>% desctable(labels = c(hp = "Horse Power", | |||
#' cyl = "Cylinders", | |||
#' mpg = "Miles per gallon")) | |||
#' | |||
#' # With grouping on a factor | |||
#' iris %>% | |||
#' group_by(Species) %>% | |||
#' desctable(stats = stats_default) | |||
#' | |||
#' # With nested grouping, on arbitrary variables | |||
#' mtcars %>% | |||
#' group_by(vs, cyl) %>% | |||
#' desctable() | |||
#' | |||
#' # With grouping on a condition, and choice of tests | |||
#' iris %>% | |||
#' group_by(Petal.Length > 5) %>% | |||
#' desctable(tests = list(.auto = tests_auto, Species = ~chisq.test)) | |||
desctable <- function(data, stats, tests, labels) { | |||
warning("desctable is deprecated and will be removed in 1.0.0. | |||
Please use the `desc_*` family of functions (`desc_table`, `desc_tests`, `desc_output`)") | |||
UseMethod("desctable", data) | |||
} | |||
#' @rdname desctable | |||
#' @export | |||
desctable.default <- function(data, stats = stats_auto, tests, labels = NULL) { | |||
# Assemble the Variables and the statTable in a single desctable object | |||
list(Variables = varColumn(data, labels), | |||
stats = statTable(data, stats)) %>% | |||
set_desctable_class() | |||
} | |||
#' @rdname desctable | |||
#' @export | |||
desctable.grouped_df <- function(data, stats = stats_auto, tests = tests_auto, labels = NULL) { | |||
# Get groups then ungroup dataframe | |||
grps <- dplyr::groups(data) | |||
data <- dplyr::ungroup(data) | |||
# Assemble the Variables (excluding the grouping ones) and the subTables recursively in a single desctable object | |||
c(Variables = list(varColumn(data[!names(data) %in% (grps %>% lapply(as.character) %>% unlist())], labels)), | |||
subTable(data, stats, tests, grps)) %>% | |||
set_desctable_class() | |||
} | |||
#' Create the subtables names | |||
#' | |||
#' Create the subtables names, as | |||
#' factor: level (n=sub-group length) | |||
#' | |||
#' @param grp Grouping factor | |||
#' @param df Dataframe containing the grouping factor | |||
#' @return A character vector with the names for the subtables | |||
#' @keywords deprecated internal | |||
subNames <- function(grp, df) { | |||
paste0(as.character(grp), | |||
": ", | |||
eval(grp, df) %>% factor() %>% levels(), | |||
" (n=", | |||
summary(eval(grp, df) %>% factor() %>% stats::na.omit(), maxsum = Inf), | |||
")") | |||
} | |||
#' Create a subtable in a grouped desctable | |||
#' | |||
#' @param df Dataframe to use | |||
#' @param stats Stats list/function to use | |||
#' @param tests Tests list/function to use | |||
#' @param grps List of symbols for grouping factors | |||
#' @return A nested list of statTables and testColumns | |||
#' @keywords deprecated internal | |||
subTable <- function(df, stats, tests, grps) { | |||
# Final group, compute tests | |||
if (length(grps) == 1) { | |||
group <- factor(eval(grps[[1]], df)) | |||
# Create the subtable stats | |||
df[!names(df) %in% as.character(grps[[1]])] %>% | |||
by(group, statTable, stats) %>% | |||
# Name the subtables with info about group and group size | |||
stats::setNames(subNames(grps[[1]], df)) -> stats | |||
# Create the subtable tests | |||
pvalues <- testColumn(df, tests, grps[[1]]) | |||
c(stats, tests = list(pvalues)) | |||
} else { | |||
group <- eval(grps[[1]], df) | |||
# Go through the next grouping levels and build the subtables | |||
df[!names(df) %in% as.character(grps[[1]])] %>% | |||
by(group, subTable, stats, tests, grps[-1]) %>% | |||
# Name the subtables with info about group and group size | |||
stats::setNames(subNames(grps[[1]], df)) | |||
} | |||
} | |||
#' Print method for desctable | |||
#' | |||
#' @param x A desctable | |||
#' @param ... Additional print parameters | |||
#' @return A flat dataframe | |||
#' @export | |||
#' @keywords deprecated | |||
print.desctable <- function(x, ...) { | |||
print(as.data.frame(x)) | |||
} | |||
#' As.data.frame method for desctable | |||
#' | |||
#' @param x A desctable | |||
#' @param ... Additional as.data.frame parameters | |||
#' @return A flat dataframe | |||
#' @export | |||
#' @keywords deprecated | |||
as.data.frame.desctable <- function(x, ...) { | |||
# Discard "markdown" formatting of variable names | |||
x$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*", "\\1", x$Variables$Variables) | |||
x$Variables$Variables <- gsub("\\*(.*?)\\*", "\\1", x$Variables$Variables) | |||
# Create a dataframe header | |||
header <- header(x, "dataframe") | |||
# Make a standard dataframe | |||
x %>% | |||
flatten_desctable() %>% | |||
data.frame(check.names = F, ...) %>% | |||
stats::setNames(header) | |||
} | |||
#' Pander method for desctable | |||
#' | |||
#' Pander method to output a desctable | |||
#' | |||
#' Uses \code{pandoc.table}, with some default parameters (\code{digits = 2}, \code{justify = "left"}, \code{missing = ""}, \code{keep.line.breaks = T}, \code{split.tables = Inf}, and \code{emphasize.rownames = F}), that you can override if needed. | |||
#' | |||
#' @param x A desctable | |||
#' @inheritParams pander::pandoc.table | |||
#' @seealso \code{\link{pandoc.table}} | |||
#' @export | |||
#' @keywords deprecated | |||
pander.desctable <- function(x = NULL, | |||
digits = 2, | |||
justify = "left", | |||
missing = "", | |||
keep.line.breaks = T, | |||
split.tables = Inf, | |||
emphasize.rownames = F, | |||
...) { | |||
if (is.null(digits)) digits <- pander::panderOptions("digits") | |||
# Discard "markdown" and insert 4 NbSp before factor levels | |||
x$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", " \\2", x$Variables$Variables) | |||
# Create a pander header | |||
header <- header(x, "pander") | |||
# Make a dataframe and push it to pandoc | |||
x %>% | |||
flatten_desctable %>% | |||
data.frame(check.names = F, stringsAsFactors = F) %>% | |||
stats::setNames(header) %>% | |||
pander::pandoc.table(justify = justify, | |||
digits = digits, | |||
missing = missing, | |||
keep.line.breaks = keep.line.breaks, | |||
split.tables = split.tables, | |||
emphasize.rownames = emphasize.rownames, | |||
...) | |||
} | |||
#' Create an HTML table widget using the DataTables library | |||
#' | |||
#' This function creates an HTML widget to display rectangular data (a matrix or data frame) using the JavaScript library DataTables, with a method for \code{desctable} objects. | |||
#' | |||
#' @note | |||
#' You are recommended to escape the table content for security reasons (e.g. XSS attacks) when using this function in Shiny or any other dynamic web applications. | |||
#' @references | |||
#' See \url{https://rstudio.github.io/DT/} for the full documentation. | |||
#' @examples | |||
#' library(DT) | |||
#' | |||
#' # see the package vignette for examples and the link to website | |||
#' vignette('DT', package = 'DT') | |||
#' | |||
#' # some boring edge cases for testing purposes | |||
#' m = matrix(nrow = 0, ncol = 5, dimnames = list(NULL, letters[1:5])) | |||
#' datatable(m) # zero rows | |||
#' datatable(as.data.frame(m)) | |||
#' | |||
#' m = matrix(1, dimnames = list(NULL, 'a')) | |||
#' datatable(m) # one row and one column | |||
#' datatable(as.data.frame(m)) | |||
#' | |||
#' m = data.frame(a = 1, b = 2, c = 3) | |||
#' datatable(m) | |||
#' datatable(as.matrix(m)) | |||
#' | |||
#' # dates | |||
#' datatable(data.frame( | |||
#' date = seq(as.Date("2015-01-01"), by = "day", length.out = 5), x = 1:5 | |||
#' )) | |||
#' datatable(data.frame(x = Sys.Date())) | |||
#' datatable(data.frame(x = Sys.time())) | |||
#' | |||
#' ### | |||
#' @inheritParams DT::datatable | |||
#' @export | |||
#' @keywords deprecated | |||
datatable <- function(data, ...) { | |||
UseMethod("datatable", data) | |||
} | |||
#' @rdname datatable | |||
#' @export | |||
datatable.default <- function(data, | |||
options = list(), | |||
class = "display", | |||
callback = DT::JS("return table;"), | |||
caption = NULL, | |||
filter = c("none", "bottom", "top"), | |||
escape = TRUE, | |||
style = "default", | |||
width = NULL, | |||
height = NULL, | |||
elementId = NULL, | |||
fillContainer = getOption("DT.fillContainer", NULL), | |||
autoHideNavigation = getOption("DT.autoHideNavigation", NULL), | |||
selection = c("multiple", "single", "none"), | |||
extensions = list(), | |||
plugins = NULL, ...) { | |||
DT::datatable(data, options = options, class = class, callback = callback, caption = caption, filter = filter, escape = escape, style = style, width = width, height = height, elementId = elementId, fillContainer = fillContainer, autoHideNavigation = autoHideNavigation, selection = selection, extensions = extensions, plugins = plugins, ...) | |||
} | |||
#' @rdname datatable | |||
#' @inheritParams base::prettyNum | |||
#' @export | |||
datatable.desctable <- function(data, | |||
options = list(paging = F, | |||
info = F, | |||
search = list(), | |||
dom = "Brtip", | |||
fixedColumns = T, | |||
fixedHeader = T, | |||
buttons = c("copy", "excel")), | |||
class = "display", | |||
callback = DT::JS("return table;"), | |||
caption = NULL, | |||
filter = c("none", "bottom", "top"), | |||
escape = FALSE, | |||
style = "default", | |||
width = NULL, | |||
height = NULL, | |||
elementId = NULL, | |||
fillContainer = getOption("DT.fillContainer", NULL), | |||
autoHideNavigation = getOption("DT.autoHideNavigation", NULL), | |||
selection = c("multiple", "single", "none"), | |||
extensions = c("FixedHeader", "FixedColumns", "Buttons"), | |||
plugins = NULL, | |||
rownames = F, | |||
digits = 2, ...) { | |||
# Discard "markdown" and insert 4 NbSp before factor levels | |||
data$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", " \\2", data$Variables$Variables) | |||
data$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*", "<b>\\1</b>", data$Variables$Variables) | |||
# Create a datatable header | |||
header <- header(data, "datatable") | |||
# Flatten desctable | |||
flat <- flatten_desctable(data) | |||
# Replace NAs and apply digits arg | |||
if (!is.null(digits)) | |||
{ | |||
flat %>% | |||
lapply(prettyNum, digits = digits) %>% | |||
lapply(gsub, pattern = "^NA$", replacement = "") -> flat | |||
} | |||
# Make a dataframe and push it to datatable, with its custom header | |||
flat %>% | |||
data.frame(check.names = F, stringsAsFactors = F) %>% | |||
DT::datatable(container = header, | |||
options = options, | |||
extensions = extensions, | |||
escape = escape, | |||
class = class, | |||
callback = callback, | |||
caption = caption, | |||
filter = filter, | |||
style = style, | |||
width = width, | |||
height = height, | |||
elementId = elementId, | |||
fillContainer = fillContainer, | |||
autoHideNavigation = autoHideNavigation, | |||
selection = selection, | |||
plugins = plugins, | |||
rownames = rownames, ...) | |||
} | |||
#' Set the "desctable" class to the passed object | |||
#' | |||
#' @param x Object to set the "desctable" class to | |||
#' @return The object with the class "desctable" | |||
#' @keywords deprecated internal | |||
set_desctable_class <- function(x) { | |||
class(x) <- "desctable" | |||
x | |||
} | |||
#' Parse a formula | |||
#' | |||
#' Parse a formula defining the conditions to pick a stat/test | |||
#' | |||
#' Parse a formula defining the conditions to pick a stat/test | |||
#' and return the function to use. | |||
#' The formula is to be given in the form of | |||
#' conditional ~ T | F | |||
#' and conditions can be nested such as | |||
#' conditional1 ~ (conditional2 ~ T | F) | F | |||
#' The FALSE option can be omitted, and the TRUE can be replaced with NA | |||
#' | |||
#' @param x The variable to test it on | |||
#' @param f A formula to parse | |||
#' @return A function to use as a stat/test | |||
#' @keywords deprecated internal | |||
parse_formula <- function(x, f) { | |||
parse_f <- function(x) { | |||
if (length(x) == 1) as.character(x) | |||
else { | |||
if (as.character(x[[1]]) == "~") { | |||
paste0("if (", parse_f(x[[2]]), "(x)) ", | |||
"{", | |||
parse_f(x[[3]]), | |||
"}") | |||
} else if (as.character(x[[1]]) == "|") { | |||
paste0(parse_f(x[[2]]), | |||
"} else ", | |||
"{", | |||
parse_f(x[[3]])) | |||
} else if (as.character(x[[1]]) == "(") { | |||
parse_f(x[[2]]) | |||
} | |||
} | |||
} | |||
eval(parse(text = parse_f(f))) | |||
} | |||
#' Build the header for pander | |||
#' | |||
#' @param head A headerList object | |||
#' @return A names vector | |||
#' @keywords deprecated internal | |||
head_pander <- function(head) { | |||
if (is.integer(head[[1]])) { | |||
head %>% | |||
names %>% | |||
lapply(function(x){c(x, rep("", head[[x]] - 1))}) %>% | |||
unlist() | |||
} else { | |||
paste(head %>% | |||
names() %>% | |||
lapply(function(x){c(x, rep("", attr(head[[x]], "colspan") - 1))}) %>% | |||
unlist(), | |||
head %>% | |||
lapply(head_pander) %>% | |||
unlist(), | |||
sep = "<br/>") | |||
} | |||
} | |||
#' Build the header for datatable | |||
#' | |||
#' @param head A headerList object | |||
#' @return An htmltools$tags object containing the header | |||
#' @keywords deprecated internal | |||
head_datatable <- function(head) { | |||
TRs <- list() | |||
while (is.list(head[[1]])) { | |||
TR <- mapply(function(x, y) htmltools::tags$th(x, colspan = y), names(head), lapply(head, attr, "colspan"), SIMPLIFY = F) | |||
TRs <- c(TRs, list(TR)) | |||
head <- purrr::flatten(head) | |||
} | |||
c(TRs, list(mapply(function(x, y) htmltools::tags$th(x, colspan = y), names(head), head, SIMPLIFY = F))) | |||
} | |||
#' Build the header for dataframe | |||
#' | |||
#' @param head A headerList object | |||
#' @return A names vector | |||
#' @keywords deprecated internal | |||
head_dataframe <- function(head) { | |||
if (is.integer(head[[1]])) { | |||
head %>% | |||
names() %>% | |||
lapply(function(x){rep(x, head[[x]])}) %>% | |||
unlist() | |||
} else { | |||
paste(head %>% | |||
names() %>% | |||
lapply(function(x){rep(x, attr(head[[x]], "colspan"))}) %>% | |||
unlist(), | |||
head %>% | |||
lapply(head_pander) %>% | |||
unlist(), | |||
sep = " / ") | |||
} | |||
} | |||
#' Build header | |||
#' | |||
#' Take a desctable object and create a suitable header for the mentionned output. | |||
#' Output can be one of "pander", "datatable", or "dataframe". | |||
#' | |||
#' @param desctable A desctable object | |||
#' @param output An output format for the header | |||
#' @return A header object in the output format | |||
#' @keywords deprecated internal | |||
header <- function(desctable, output = c("pander", "datatable", "dataframe")) { | |||
desctable[-1] %>% | |||
flatten_desctable() %>% | |||
data.frame(check.names = F) %>% | |||
names() -> nm | |||
desctable <- desctable[-1] | |||
if (length(desctable) == 1) { | |||
if (output == "datatable") { | |||
c("\u00A0", nm) %>% | |||
lapply(htmltools::tags$th) %>% | |||
htmltools::tags$tr() %>% | |||
htmltools::tags$thead() %>% | |||
htmltools::tags$table(class = "display") | |||
} else c("\u00A0", nm) | |||
} else { | |||
head <- headerList(desctable) | |||
if (output == "pander") { | |||
c("\u00A0", head_pander(head) %>% | |||
paste(nm, sep = "<br/>")) | |||
} else if (output == "datatable") { | |||
head <- c(head_datatable(head), list(nm %>% lapply(htmltools::tags$th))) | |||
head[[1]] <- c(list(htmltools::tags$th(rowspan = length(head))), head[[1]]) | |||
head %>% | |||
lapply(htmltools::tags$tr) %>% | |||
htmltools::tags$thead() %>% | |||
htmltools::tags$table(class = "display") | |||
} else if (output == "dataframe") { | |||
c("\u00A0", head_dataframe(head) %>% paste(nm, sep = " / ")) | |||
} | |||
} | |||
} | |||
#' build a header list object | |||
#' | |||
#' @param desctable a desctable | |||
#' @return a nested list of headers with colspans | |||
#' @keywords deprecated internal | |||
headerList <- function(desctable) { | |||
if (is.data.frame(desctable)) length(desctable) | |||
else { | |||
rec <- lapply(desctable, headerList) | |||
if (is.integer(rec[[1]])) attr(rec, "colspan") <- rec %>% unlist() %>% sum() | |||
else attr(rec, "colspan") <- rec %>% lapply(attr, "colspan") %>% unlist() %>% sum() | |||
rec | |||
} | |||
} | |||
#' Flatten a desctable to a dataframe recursively | |||
#' | |||
#' @param desctable A desctable object | |||
#' @return A flat dataframe | |||
#' @keywords deprecated internal | |||
flatten_desctable <- function(desctable) { | |||
if (is.data.frame(desctable)) desctable | |||
else { | |||
desctable %>% | |||
lapply(flatten_desctable) %>% | |||
Reduce(f = cbind) | |||
} | |||
} | |||
#' Define a list of default statistics | |||
#' | |||
#' @param data A dataframe | |||
#' @return A list of statistical functions | |||
#' @export | |||
#' @keywords deprecated | |||
stats_default <- function(data) { | |||
list("N" = length, | |||
"%" = percent, | |||
"Mean" = ~if (is.normal(.)) mean(.), | |||
"sd" = ~if (is.normal(.)) sd(.), | |||
"Med" = stats::median, | |||
"IQR" = ~if (!is.factor(.)) IQR(.)) | |||
} | |||
#' @rdname stats_default | |||
#' @export | |||
stats_normal <- function(data) { | |||
list("N" = length, | |||
"%" = percent, | |||
"Mean" = mean, | |||
"sd" = stats::sd) | |||
} | |||
#' @rdname stats_default | |||
#' @export | |||
stats_nonnormal <- function(data) { | |||
list("N" = length, | |||
"%" = percent, | |||
"Median" = stats::median, | |||
"IQR" = ~if (!is.factor(.)) IQR(.)) | |||
} |
@@ -7,5 +7,7 @@ dplyr::`%>%` | |||
#' @export | |||
dplyr::group_by | |||
#' @importFrom pander pander | |||
pander::pander | |||
#' @importFrom rlang !!! | |||
#' @export | |||
rlang::`!!!` |
@@ -1,204 +1,223 @@ | |||
#' Print method for desctable | |||
#' | |||
#' @param x A desctable | |||
#' @param ... Additional print parameters | |||
#' @return A flat dataframe | |||
#' @export | |||
print.desctable <- function(x, ...) { | |||
print(as.data.frame(x)) | |||
##' Output a desctable to the desired target format | |||
##' | |||
##' Output a simple or grouped desctable to a different formats. | |||
##' Currently available formats are\itemize{ | |||
##' \item data.frame ("df") | |||
##' \item pander ("pander") | |||
##' \item datatable ("DT") | |||
##' } | |||
##' | |||
##' All numerical values will be rounded to the digits argument. | |||
##' If statistical tests are presents, p values below 1E-digits will be replaced with "≤ 1E-digits" | |||
##' (eg. "≤ 0.01" for values below 0.01 when digits = 2) | |||
##' | |||
##' @title desc_output | |||
##' @param desctable The desctable to output | |||
##' @param target The desired target. One of "df", "pander", or "DT". | |||
##' @param digits The number of digits to display. The p values will be simplified under 1E-digits | |||
##' @param ... Other arguments to pass to \code{data.frame}, \code{pander::pander}, or \code{DT::datatable} | |||
##' @return The output object (or corresponding side effect) | |||
##' @export | |||
##' @seealso \code{\link[DT]{datatable}} | |||
##' @seealso \code{\link[pander]{pander}} | |||
##' @family desc_table core functions | |||
desc_output <- function(desctable, target = c("df", "pander", "DT"), digits = 2, ...) { | |||
switch(which.desctable(desctable), | |||
simple = switch(target, | |||
df = output_df_simple(desctable, digits, ...), | |||
pander = output_pander_simple(desctable, digits, ...), | |||
DT = output_DT_simple(desctable, digits, ...), | |||
stop("target must be one of \"df\", \"pander\", or \"DT\"")), | |||
grouped = switch(target, | |||
df = output_df_grouped(desctable, digits, ...), | |||
pander = output_pander_grouped(desctable, digits, ...), | |||
DT = output_DT_grouped(desctable, digits, ...), | |||
stop("target must be one of \"df\", \"pander\", or \"DT\"")), | |||
stop("Unexpected input. `desc_output` must be used on the output of `desc_table` or `desc_table` and `desc_tests`")) | |||
} | |||
#' As.data.frame method for desctable | |||
#' | |||
#' @param x A desctable | |||
#' @param ... Additional as.data.frame parameters | |||
#' @return A flat dataframe | |||
#' @export | |||
as.data.frame.desctable <- function(x, ...) { | |||
# Discard "markdown" formatting of variable names | |||
x$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*", "\\1", x$Variables$Variables) | |||
x$Variables$Variables <- gsub("\\*(.*?)\\*", "\\1", x$Variables$Variables) | |||
# Create a dataframe header | |||
header <- header(x, "dataframe") | |||
# Make a standard dataframe | |||
x %>% | |||
flatten_desctable() %>% | |||
data.frame(check.names = F, ...) %>% | |||
stats::setNames(header) | |||
output_df_simple <- function(desctable, digits, ...) { | |||
# Fix variables | |||
variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", "\u00A0\u00A0\u00A0\u00A0\\2", desctable$Variables) | |||
desctable$Variables <- NULL | |||
# Round to digits and set row names | |||
desctable %>% | |||
lapply(prettyNum, digits = digits) %>% | |||
lapply(gsub, pattern = "^NA$", replacement = "") %>% | |||
as.data.frame(check.names = F, | |||
stringsAsFactors = F, | |||
row.names = variables, | |||
...) | |||
} | |||
#' Pander method for desctable | |||
#' | |||
#' Pander method to output a desctable | |||
#' | |||
#' Uses \code{pandoc.table}, with some default parameters (\code{digits = 2}, \code{justify = "left"}, \code{missing = ""}, \code{keep.line.breaks = T}, \code{split.tables = Inf}, and \code{emphasize.rownames = F}), that you can override if needed. | |||
#' | |||
#' @param x A desctable | |||
#' @inheritParams pander::pandoc.table | |||
#' @seealso \code{\link{pandoc.table}} | |||
#' @export | |||
pander.desctable <- function(x = NULL, | |||
digits = 2, | |||
justify = "left", | |||
missing = "", | |||
keep.line.breaks = T, | |||
split.tables = Inf, | |||
emphasize.rownames = F, | |||
...) { | |||
if (is.null(digits)) digits <- pander::panderOptions("digits") | |||
# Discard "markdown" and insert 4 NbSp before factor levels | |||
x$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", " \\2", x$Variables$Variables) | |||
# Create a pander header | |||
header <- header(x, "pander") | |||
# Make a dataframe and push it to pandoc | |||
x %>% | |||
flatten_desctable %>% | |||
data.frame(check.names = F, stringsAsFactors = F) %>% | |||
stats::setNames(header) %>% | |||
pander::pandoc.table(justify = justify, | |||
digits = digits, | |||
missing = missing, | |||
keep.line.breaks = keep.line.breaks, | |||
split.tables = split.tables, | |||
emphasize.rownames = emphasize.rownames, | |||
...) | |||
output_df_grouped <- function(desctable, digits, ...) { | |||
# Fix variables | |||
variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", "\u00A0\u00A0\u00A0\u00A0\\2", desctable$.vars[[1]]$Variables) | |||
# Add tests and round p values | |||
table <- Reduce(desctable$.stats, f = cbind) | |||
if (desctable %>% utils::hasName(".tests")) { | |||
tests <- desctable$.tests[[1]] | |||
tests$p[tests$p < 10^-digits] <- 10^-digits | |||
prettyNum(tests$p, digits = digits) %>% | |||
gsub(pattern = "^NA$", replacement = "") %>% | |||
gsub(pattern = "^(0.0*1)$", replacement = "\u2264 \\1") -> tests$p | |||
table <- cbind(table, tests) | |||
} | |||
# Build header | |||
indices <- cumsum(c(1, sapply(desctable$.stats, length))) | |||
indices <- indices[1:length(indices) - 1] | |||
nmtoreplace <- names(table)[indices] | |||
names(table)[indices] <- paste0(names(desctable)[1], " = ", desctable[[1]], " (N = ", sapply(desctable$data, nrow), ")\n", nmtoreplace) | |||
# Round to digits and set row names | |||
table %>% | |||
lapply(prettyNum, digits = digits) %>% | |||
lapply(gsub, pattern = "^NA$", replacement = "") %>% | |||
as.data.frame(check.names = F, | |||
stringsAsFactors = F, | |||
row.names = variables, | |||
...) | |||
} | |||
#' Create an HTML table widget using the DataTables library | |||
#' | |||
#' This function creates an HTML widget to display rectangular data (a matrix or data frame) using the JavaScript library DataTables, with a method for \code{desctable} objects. | |||
#' | |||
#' @note | |||
#' You are recommended to escape the table content for security reasons (e.g. XSS attacks) when using this function in Shiny or any other dynamic web applications. | |||
#' @references | |||
#' See \url{https://rstudio.github.io/DT/} for the full documentation. | |||
#' @examples | |||
#' library(DT) | |||
#' | |||
#' # see the package vignette for examples and the link to website | |||
#' vignette('DT', package = 'DT') | |||
#' | |||
#' # some boring edge cases for testing purposes | |||
#' m = matrix(nrow = 0, ncol = 5, dimnames = list(NULL, letters[1:5])) | |||
#' datatable(m) # zero rows | |||
#' datatable(as.data.frame(m)) | |||
#' | |||
#' m = matrix(1, dimnames = list(NULL, 'a')) | |||
#' datatable(m) # one row and one column | |||
#' datatable(as.data.frame(m)) | |||
#' | |||
#' m = data.frame(a = 1, b = 2, c = 3) | |||
#' datatable(m) | |||
#' datatable(as.matrix(m)) | |||
#' | |||
#' # dates | |||
#' datatable(data.frame( | |||
#' date = seq(as.Date("2015-01-01"), by = "day", length.out = 5), x = 1:5 | |||
#' )) | |||
#' datatable(data.frame(x = Sys.Date())) | |||
#' datatable(data.frame(x = Sys.time())) | |||
#' | |||
#' ### | |||
#' @inheritParams DT::datatable | |||
#' @export | |||
datatable <- function(data, ...) { | |||
UseMethod("datatable", data) | |||
output_pander_simple <- function(desctable, digits, ...) { | |||
# Fix variables | |||
variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", " \\2", desctable$Variables) | |||
desctable$Variables <- NULL | |||
# Round to digits and set row names | |||
desctable %>% | |||
`row.names<-`(variables) %>% | |||
pander(digits = digits, | |||
justify = "left", | |||
missing = "" , | |||
keep.line.breaks = T, | |||
split.tables = Inf, | |||
emphasize.rownames = F, | |||
...) | |||
} | |||
#' @rdname datatable | |||
#' @export | |||
datatable.default <- function(data, | |||
options = list(), | |||
class = "display", | |||
callback = DT::JS("return table;"), | |||
caption = NULL, | |||
filter = c("none", "bottom", "top"), | |||
escape = TRUE, | |||
style = "default", | |||
width = NULL, | |||
height = NULL, | |||
elementId = NULL, | |||
fillContainer = getOption("DT.fillContainer", NULL), | |||
autoHideNavigation = getOption("DT.autoHideNavigation", NULL), | |||
selection = c("multiple", "single", "none"), | |||
extensions = list(), | |||
plugins = NULL, ...) { | |||
DT::datatable(data, options = options, class = class, callback = callback, caption = caption, filter = filter, escape = escape, style = style, width = width, height = height, elementId = elementId, fillContainer = fillContainer, autoHideNavigation = autoHideNavigation, selection = selection, extensions = extensions, plugins = plugins, ...) | |||
output_pander_grouped <- function(desctable, digits, ...) { | |||
# Fix variables | |||
variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", " \\2", desctable$.vars[[1]]$Variables) | |||
# Add tests and round p values | |||
table <- Reduce(desctable$.stats, f = cbind) | |||
if (desctable %>% utils::hasName(".tests")) { | |||
tests <- desctable$.tests[[1]] | |||
tests$p[tests$p < 10^-digits] <- 10^-digits | |||
prettyNum(tests$p, digits = digits) %>% | |||
gsub(pattern = "^NA$", replacement = "") %>% | |||
gsub(pattern = "^(0.0*1)$", replacement = "\u2264 \\1") -> tests$p | |||
table <- cbind(table, tests) | |||
} | |||
# Build header | |||
indices <- cumsum(c(1, sapply(desctable$.stats, length))) | |||
indices <- indices[1:length(indices) - 1] | |||
nmtoreplace <- names(table)[indices] | |||
names(table)[indices] <- paste0(names(desctable)[1], " = ", desctable[[1]], "</br>\n(N = ", sapply(desctable$data, nrow), ")</br>\n", nmtoreplace) | |||
# Round to digits and set row names | |||
table %>% | |||
`row.names<-`(variables) %>% | |||
pander(digits = digits, | |||
justify = "left", | |||
missing = "" , | |||
keep.line.breaks = T, | |||
split.tables = Inf, | |||
emphasize.rownames = F, | |||
...) | |||
} | |||
output_DT_simple <- function(desctable, digits, ...) { | |||
# Fix variables | |||
variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", " \\2", desctable$Variables) | |||
variables <- gsub("\\*\\*(.*?)\\*\\*", "<b>\\1</b>", variables) | |||
desctable$Variables <- NULL | |||
# Round to digits and set row names | |||
desctable %>% | |||
sapply(function(x) !(is.integer(x) | is.character(x)), simplify = T) %>% | |||
which -> toRound | |||
DT::datatable(desctable, | |||
options = list(paging = F, | |||
info = F, | |||
search = list(), | |||
dom = "Brtip", | |||
fixedColumns = T, | |||
fixedHeader = T, | |||
buttons = c("copy", "excel")), | |||
rownames = variables, | |||
escape = F, | |||
style = "default", | |||
extensions = c("FixedHeader", "FixedColumns", "Buttons"), | |||
...) %>% | |||
DT::formatRound(digits = digits, columns = toRound) | |||
} | |||
#' @rdname datatable | |||
#' @inheritParams base::prettyNum | |||
#' @export | |||
datatable.desctable <- function(data, | |||
options = list(paging = F, | |||
info = F, | |||
search = list(), | |||
dom = "Brtip", | |||
fixedColumns = T, | |||
fixedHeader = T, | |||
buttons = c("copy", "excel")), | |||
class = "display", | |||
callback = DT::JS("return table;"), | |||
caption = NULL, | |||
filter = c("none", "bottom", "top"), | |||
escape = FALSE, | |||
style = "default", | |||
width = NULL, | |||
height = NULL, | |||
elementId = NULL, | |||
fillContainer = getOption("DT.fillContainer", NULL), | |||
autoHideNavigation = getOption("DT.autoHideNavigation", NULL), | |||
selection = c("multiple", "single", "none"), | |||
extensions = c("FixedHeader", "FixedColumns", "Buttons"), | |||
plugins = NULL, | |||
rownames = F, | |||
digits = 2, ...) { | |||
# Discard "markdown" and insert 4 NbSp before factor levels | |||
data$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", " \\2", data$Variables$Variables) | |||
data$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*", "<b>\\1</b>", data$Variables$Variables) | |||
# Create a datatable header | |||
header <- header(data, "datatable") | |||
# Flatten desctable | |||
flat <- flatten_desctable(data) | |||
# Replace NAs and apply digits arg | |||
if (!is.null(digits)) | |||
{ | |||
flat %>% | |||
lapply(prettyNum, digits = digits) %>% | |||
lapply(gsub, pattern = "^NA$", replacement = "") -> flat | |||
output_DT_grouped <- function(desctable, digits, ...) { | |||
# Fix variables | |||
variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", " \\2", desctable$.vars[[1]]$Variables) | |||
variables <- gsub("\\*\\*(.*?)\\*\\*", "<b>\\1</b>", variables) | |||
# Add tests and round p values | |||
table <- Reduce(desctable$.stats, f = cbind) | |||
if (desctable %>% utils::hasName(".tests")) { | |||
tests <- desctable$.tests[[1]] | |||
tests$p[tests$p < 10^-digits] <- 10^-digits | |||
prettyNum(tests$p, digits = digits) %>% | |||
gsub(pattern = "^NA$", replacement = "") %>% | |||
gsub(pattern = "^(0.0*1)$", replacement = "\u2264 \\1") -> tests$p | |||
table <- cbind(table, tests) | |||
} | |||
# Make a dataframe and push it to datatable, with its custom header | |||
flat %>% | |||
data.frame(check.names = F, stringsAsFactors = F) %>% | |||
DT::datatable(container = header, | |||
options = options, | |||
extensions = extensions, | |||
escape = escape, | |||
class = class, | |||
callback = callback, | |||
caption = caption, | |||
filter = filter, | |||
style = style, | |||
width = width, | |||
height = height, | |||
elementId = elementId, | |||
fillContainer = fillContainer, | |||
autoHideNavigation = autoHideNavigation, | |||
selection = selection, | |||
plugins = plugins, | |||
rownames = rownames, ...) | |||
# Build header | |||
header <- htmltools::tags$table( | |||
htmltools::tags$thead( | |||
htmltools::tags$tr( | |||
htmltools::tags$th(rowspan = 2, ""), | |||
mapply(htmltools::tags$th, | |||
colspan = sapply(desctable$.stats, length), | |||
paste0(names(desctable)[1], " = ", desctable[[1]], " (N = ", sapply(desctable$data, nrow) , ")"), | |||
SIMPLIFY = F)), | |||
htmltools::tags$tr( | |||
lapply(unlist(names(table)),htmltools::tags$th))), | |||
class = "display") | |||
# Round to digits and set row names | |||
table %>% | |||
sapply(function(x) !(is.integer(x) | is.character(x)), simplify = T) %>% | |||
which -> toRound | |||
DT::datatable(table, | |||
container = header, | |||
options = list(paging = F, | |||
info = F, | |||
search = list(), | |||
dom = "Brtip", | |||
fixedColumns = T, | |||
fixedHeader = T, | |||
buttons = c("copy", "excel")), | |||
rownames = variables, | |||
escape = F, | |||
style = "default", | |||
extensions = c("FixedHeader", "FixedColumns", "Buttons"), | |||
...) %>% | |||
DT::formatRound(digits = digits, columns = toRound) | |||
} |
@@ -13,12 +13,13 @@ | |||
#' @param x A vector | |||
#' @export | |||
#' @return The results for the function applied on the vector, compatible with the format of the result table | |||
#' @keywords internal | |||
statify <- function(x, f) { | |||
# Discard NA values | |||
x <- stats::na.omit(x) | |||
## Deprecate conditional formula | |||
if (length(f) == 3) | |||
if (length(f) == 3) # remove after 1.0 | |||
f <- parse_formula(x, f) | |||
else | |||
f <- rlang::as_function(f) | |||
@@ -50,88 +51,42 @@ statify <- function(x, f) { | |||
} | |||
#' Functions to create a list of statistics to use in desctable | |||
#' Function to create a list of statistics to use in desctable | |||
#' | |||
#' These functions take a dataframe as argument and return a list of statistcs in the form accepted by desctable. | |||
#' This function takes a dataframe as argument and returns a list of statistcs in the form accepted by desctable. | |||
#' | |||
#' Already defined are | |||
#' \enumerate{ | |||
#' \item stats_default with length, \%, mean, sd, med and IQR | |||
#' \item stats_normal with length, \%, mean and sd | |||
#' \item stats_nonnormal with length, %, median and IQR | |||
#' \item stats_auto, which picks stats depending of the data | |||
#' } | |||
#' | |||
#' You can define your own automatic functions, as long as they take a dataframe as argument and return a list of functions or formulas defining conditions to use a stat function. | |||
#' You can define your own automatic function, as long as it takes a dataframe as argument and returns a list of functions, or formulas defining conditions to use a stat function. | |||
#' | |||
#' @param data The dataframe to apply the statistic to | |||
#' @return A list of statistics to use, potentially assessed from the dataframe | |||
#' @export | |||
stats_default <- function(data) { | |||
list("N" = length, | |||
"%" = percent, | |||
"Mean" = ~if (is.normal(.)) mean(.), | |||
"sd" = ~if (is.normal(.)) sd(.), | |||
"Med" = stats::median, | |||
"IQR" = ~if (!is.factor(.)) IQR(.)) | |||
} | |||
#' @rdname stats_default | |||
#' @export | |||
stats_normal <- function(data) { | |||
list("N" = length, | |||
"%" = percent, | |||
"Mean" = mean, | |||
"sd" = stats::sd) | |||
} | |||
#' @rdname stats_default | |||
#' @export | |||
stats_nonnormal <- function(data) { | |||
list("N" = length, | |||
"%" = percent, | |||
"Median" = stats::median, | |||
"IQR" = ~if (!is.factor(.)) IQR(.)) | |||
} | |||
#' @rdname stats_default | |||
#' @return A list of statistics to use, assessed from the content of the dataframe | |||
#' @export | |||
stats_auto <- function(data) { | |||
data %>% | |||
Filter(f = is.numeric) %>% | |||
lapply(is.normal) %>% | |||
unlist() -> shapiro | |||
if (length(shapiro) == 0) { | |||
normal <- F | |||
nonnormal <- F | |||
} else { | |||
normal <- any(shapiro) | |||
nonnormal <- any(!shapiro) | |||
} | |||
lapply(is.numeric) %>% | |||
unlist() %>% | |||
any -> numeric | |||
data %>% | |||
lapply(is.factor) %>% | |||
unlist() %>% | |||
any() -> fact | |||
if (fact & normal & !nonnormal) stats_normal(data) | |||
else if (fact & !normal & nonnormal) stats_nonnormal(data) | |||
else if (fact & !normal & !nonnormal) list("N" = length, | |||
"%" = percent) | |||
else if (!fact & normal & nonnormal) list("N" = length, | |||
"Mean" = ~if (is.normal(.)) mean(.), | |||
"sd" = ~if (is.normal(.)) sd(.), | |||
"Med" = stats::median, | |||
"IQR" = ~if (!is.factor(.)) IQR(.)) | |||
else if (!fact & normal & !nonnormal) list("N" = length, | |||
"Mean" = mean, | |||
"sd" = stats::sd) | |||
else if (!fact & !normal & nonnormal) list("N" = length, | |||
"Med" = stats::median, | |||
"IQR" = IQR) | |||
else stats_default(data) | |||
stats <- list("Min" = min, | |||
"Q1" = ~quantile(., .25), | |||
"Med" = stats::median, | |||
"Mean" = mean, | |||
"Q3" = ~quantile(., .75), | |||
"Max" = max, | |||
"sd" = stats::sd, | |||
"IQR" = IQR) | |||
if (fact & numeric) | |||
c(list("N" = length, | |||
"%" = percent), | |||
stats) | |||
else if (fact & !numeric) | |||
list("N" = length, | |||
"%" = percent) | |||
else if (!fact & numeric) | |||
stats | |||
} |
@@ -7,6 +7,7 @@ | |||
#' @param f The function to try to apply, or a formula combining two functions | |||
#' @param group Grouping factor | |||
#' @return The results for the function applied on the vector, compatible with the format of the result table | |||
#' @keywords internal | |||
testify <- function(x, f, group) { | |||
# Extract the name of the function | |||
f %>% | |||
@@ -32,11 +33,12 @@ testify <- function(x, f, group) { | |||
} | |||
#' Functions to choose a statistical test | |||
#' Function to choose a statistical test | |||
#' | |||
#' These functions take a variable and a grouping variable as arguments, and return a statistcal test to use, expressed as a single-term formula. | |||
#' This function takes a variable and a grouping variable as arguments, and returns a statistcal test to use, expressed as a single-term formula. | |||
#' | |||
#' Currently, only \code{tests_auto} is defined, and picks between t test, wilcoxon, anova, kruskal-wallis and fisher depending on the number of groups, the type of the variable, the normality and homoskedasticity of the distributions. | |||
#' This function uses appropriate non-parametric tests depending on the number of levels (wilcoxon.test for two levels | |||
#' and kruskal.test for more), and fisher.test with fallback on chisq.test on error for factors. | |||
#' | |||
#' @param var The variable to test | |||
#' @param grp The variable for the groups | |||
@@ -45,25 +47,15 @@ testify <- function(x, f, group) { | |||
tests_auto <- function(var, grp) { | |||
grp <- factor(grp) | |||
if (nlevels(grp) < 2) ~no.test | |||
if (nlevels(grp) < 2) | |||
~no.test | |||
else if (is.factor(var)) { | |||
if (tryCatch(is.numeric(fisher.test(var ~ grp)$p.value), error = function(e) F)) ~fisher.test | |||
else ~chisq.test | |||
} else { | |||
all_normal <- all(tapply(var, grp, is.normal)) | |||
if (nlevels(grp) == 2) { | |||
if (all_normal) { | |||
if (tryCatch(stats::var.test(var ~ grp)$p.value > .1, warning = function(e) F, error = function(e) F)) ~t.test(., var.equal = T) | |||
else ~t.test(., var.equal = F) | |||
} | |||
else ~wilcox.test | |||
} else { | |||
if (all_normal) { | |||
if (tryCatch(stats::bartlett.test(var ~ grp)$p.value > .1, warning = function(e) F, error = function(e) F)) ~oneway.test(., var.equal = T) | |||
else ~oneway.test(., var.equal = F) | |||
} | |||
else ~kruskal.test | |||
} | |||
} | |||
if (tryCatch(is.numeric(fisher.test(var ~ grp)$p.value), error = function(e) F)) | |||
~fisher.test | |||
else | |||
~chisq.test | |||
} else if (nlevels(grp) == 2) | |||
~wilcox.test | |||
else | |||
~kruskal.test | |||
} |
@@ -7,6 +7,7 @@ | |||
#' @param y A vector or list of vectors to insert into x | |||
#' @param position The position / vector of positions to insert vector(s) y in vector x | |||
#' @return The combined vector | |||
#' @keywords internal | |||
insert <- function(x, y, position) { | |||
# y is supposed to be a list of vectors. If it is a single vector, make it a simple list containing that vector | |||
if (!is.list(y)) y <- list(y) | |||
@@ -30,192 +31,25 @@ insert <- function(x, y, position) { | |||
unlist(result) | |||
} | |||
#' Set the "desctable" class to the passed object | |||
#' | |||
#' @param x Object to set the "desctable" class to | |||
#' @return The object with the class "desctable" | |||
set_desctable_class <- function(x) { | |||
class(x) <- "desctable" | |||
x | |||
} | |||
#' Parse a formula | |||
#' Is the object possibly a desctable? | |||
#' | |||
#' Parse a formula defining the conditions to pick a stat/test | |||
#' Check if the object is produced by desc_table. | |||
#' Return a string: | |||
#' - simple | |||
#' - grouped | |||
#' or FALSE if not a desctable | |||
#' | |||
#' Parse a formula defining the conditions to pick a stat/test | |||
#' and return the function to use. | |||
#' The formula is to be given in the form of | |||
#' conditional ~ T | F | |||
#' and conditions can be nested such as | |||
#' conditional1 ~ (conditional2 ~ T | F) | F | |||
#' The FALSE option can be omitted, and the TRUE can be replaced with NA | |||
#' | |||
#' @param x The variable to test it on | |||
#' @param f A formula to parse | |||
#' @return A function to use as a stat/test | |||
parse_formula <- function(x, f) { | |||
parse_f <- function(x) { | |||
if (length(x) == 1) as.character(x) | |||
else { | |||
if (as.character(x[[1]]) == "~") { | |||
paste0("if (", parse_f(x[[2]]), "(x)) ", | |||
"{", | |||
parse_f(x[[3]]), | |||
"}") | |||
} else if (as.character(x[[1]]) == "|") { | |||
paste0(parse_f(x[[2]]), | |||
"} else ", | |||
"{", | |||
parse_f(x[[3]])) | |||
} else if (as.character(x[[1]]) == "(") { | |||
parse_f(x[[2]]) | |||
} | |||
} | |||
} | |||
eval(parse(text = parse_f(f))) | |||
} | |||
#' Build the header for pander | |||
#' | |||
#' @param head A headerList object | |||
#' @return A names vector | |||
head_pander <- function(head) { | |||
if (is.integer(head[[1]])) { | |||
head %>% | |||
names %>% | |||
lapply(function(x){c(x, rep("", head[[x]] - 1))}) %>% | |||
unlist() | |||
} else { | |||
paste(head %>% | |||
names() %>% | |||
lapply(function(x){c(x, rep("", attr(head[[x]], "colspan") - 1))}) %>% | |||
unlist(), | |||
head %>% | |||
lapply(head_pander) %>% | |||
unlist(), | |||
sep = "<br/>") | |||
} | |||
} | |||
#' Build the header for datatable | |||
#' | |||
#' @param head A headerList object | |||
#' @return An htmltools$tags object containing the header | |||
head_datatable <- function(head) { | |||
TRs <- list() | |||
while (is.list(head[[1]])) { | |||
TR <- mapply(function(x, y) htmltools::tags$th(x, colspan = y), names(head), lapply(head, attr, "colspan"), SIMPLIFY = F) | |||
TRs <- c(TRs, list(TR)) | |||
head <- purrr::flatten(head) | |||
} | |||
c(TRs, list(mapply(function(x, y) htmltools::tags$th(x, colspan = y), names(head), head, SIMPLIFY = F))) | |||
} | |||
#' @param desctable A potential desctable to check | |||
#' @return The type of desctable or FALSE | |||
#' @keywords internal | |||
which.desctable <- function(desctable) | |||
{ | |||
attributes <- list() | |||
#' Build the header for dataframe | |||
#' | |||
#' @param head A headerList object | |||
#' @return A names vector | |||
head_dataframe <- function(head) { | |||
if (is.integer(head[[1]])) { | |||
head %>% | |||
names() %>% | |||
lapply(function(x){rep(x, head[[x]])}) %>% | |||
unlist() | |||
} else { | |||
paste(head %>% | |||
names() %>% | |||
lapply(function(x){rep(x, attr(head[[x]], "colspan"))}) %>% | |||
unlist(), | |||
head %>% | |||
lapply(head_pander) %>% | |||
unlist(), | |||
sep = " / ") | |||
} | |||
} | |||
#' Build header | |||
#' | |||
#' Take a desctable object and create a suitable header for the mentionned output. | |||
#' Output can be one of "pander", "datatable", or "dataframe". | |||
#' | |||
#' @param desctable A desctable object | |||
#' @param output An output format for the header | |||
#' @return A header object in the output format | |||
header <- function(desctable, output = c("pander", "datatable", "dataframe")) { | |||
desctable[-1] %>% | |||
flatten_desctable() %>% | |||
data.frame(check.names = F) %>% | |||
names() -> nm | |||
desctable <- desctable[-1] | |||
if (length(desctable) == 1) { | |||
if (output == "datatable") { | |||
c("\u00A0", nm) %>% | |||
lapply(htmltools::tags$th) %>% | |||
htmltools::tags$tr() %>% | |||
htmltools::tags$thead() %>% | |||
htmltools::tags$table(class = "display") | |||
} else c("\u00A0", nm) | |||
} else { | |||
head <- headerList(desctable) | |||
if (output == "pander") { | |||
c("\u00A0", head_pander(head) %>% | |||
paste(nm, sep = "<br/>")) | |||
} else if (output == "datatable") { | |||
head <- c(head_datatable(head), list(nm %>% lapply(htmltools::tags$th))) | |||
head[[1]] <- c(list(htmltools::tags$th(rowspan = length(head))), head[[1]]) | |||
head %>% | |||
lapply(htmltools::tags$tr) %>% | |||
htmltools::tags$thead() %>% | |||
htmltools::tags$table(class = "display") | |||
} else if (output == "dataframe") { | |||
c("\u00A0", head_dataframe(head) %>% paste(nm, sep = " / ")) | |||
} | |||
} | |||
} | |||
#' build a header list object | |||
#' | |||
#' @param desctable a desctable | |||
#' @return a nested list of headers with colspans | |||
headerList <- function(desctable) { | |||
if (is.data.frame(desctable)) length(desctable) | |||
else { | |||
rec <- lapply(desctable, headerList) | |||
if (is.integer(rec[[1]])) attr(rec, "colspan") <- rec %>% unlist() %>% sum() | |||
else attr(rec, "colspan") <- rec %>% lapply(attr, "colspan") %>% unlist() %>% sum() | |||
rec | |||
} | |||
} | |||
#' Flatten a desctable to a dataframe recursively | |||
#' | |||
#' @param desctable A desctable object | |||
#' @return A flat dataframe | |||
flatten_desctable <- function(desctable) { | |||
if (is.data.frame(desctable)) desctable | |||
else { | |||
desctable %>% | |||
lapply(flatten_desctable) %>% | |||
Reduce(f = cbind) | |||
} | |||
if (all(c("data", ".stats", ".vars") %in% names(desctable))) | |||
"grouped" | |||
else if (is.data.frame(desctable) & ("Variables" %in% names(desctable))) | |||
"simple" | |||
else | |||
"" | |||
} |
@@ -7,363 +7,49 @@ output: github_document | |||
knitr::opts_chunk$set(message = F, warning = F) | |||
``` | |||
[![Travis-CI Build Status](https://travis-ci.org/MaximeWack/desctable.svg?branch=master)](https://travis-ci.org/MaximeWack/desctable) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/desctable)](https://cran.r-project.org/package=desctable) [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/desctable)](https://www.r-pkg.org:443/pkg/desctable) | |||
[![Travis-CI Build Status](https://travis-ci.org/desctable/desctable.svg?branch=master)](https://travis-ci.org/desctable/desctable) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/desctable)](https://cran.r-project.org/package=desctable) [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/desctable)](https://www.r-pkg.org:443/pkg/desctable) | |||
# Introduction | |||
Desctable is a comprehensive descriptive and comparative tables generator for R. | |||
**Warning to existing users** | |||
*This version introduces a new API that should make the creation of tables more flexible. | |||
The old API is still present but in a deprecated mode. | |||
See the roadmap below, and the website for the new usage. | |||
Suggestions about this change are welcome !* | |||
Every person doing data analysis has to create tables for descriptive summaries of data (a.k.a. Table.1), or comparative tables. | |||
Many packages, such as the aptly named **tableone**, address this issue. However, they often include hard-coded behaviors, have outputs not easily manipulable with standard R tools, or their syntax are out-of-style (e.g. the argument order makes them difficult to use with the pipe (`%>%`)). | |||
# Introduction | |||
Enter **desctable**, a package built with the following objectives in mind: | |||
Desctable aims to be a simple and expressive interface to building statistical tables in R. | |||
* generate descriptive and comparative statistics tables with nesting | |||
* keep the syntax as simple as possible | |||
* have good reasonable defaults | |||
* be entirely customizable, using standard R tools and functions | |||
* produce the simplest (as a data structure) output possible | |||
* provide helpers for different outputs | |||
* integrate with "modern" R usage, and the **tidyverse** set of tools | |||
* apply functional paradigms | |||
See [desctable.github.io](https://desctable.github.io) for usage ond documentation. | |||
# Installation | |||
Install from CRAN with | |||
Install from CRAN (0.1.9) with | |||
``` | |||
install.packages("desctable") | |||
``` | |||
or install the development version from github with | |||
``` | |||
devtools::install_github("maximewack/desctable") | |||
``` | |||
# Loading | |||
```{r} | |||
library(desctable) | |||
``` | |||
It is recommended to read this manual through its vignette: | |||
```{r} | |||
vignette("desctable") | |||
``` | |||
---- | |||
# Descriptive tables | |||
## Simple usage | |||
**desctable** uses and exports the pipe (`%>%`) operator (from packages **magrittr** and **dplyr** fame), though it is not mandatory to use it. | |||
The single interface to the package is its eponymous `desctable` function. | |||
When used on a data.frame, it returns a descriptive table: | |||
```{r} | |||
iris %>% | |||
desctable() | |||
desctable(mtcars) | |||
``` | |||
<br> | |||
As you can see with these two examples, `desctable` describes every variable, with individual levels for factors. It picks statistical functions depending on the type and distribution of the variables in the data, and applies those statistical functions only on the relevant variables. | |||
## Output | |||
The object produced by `desctable` is in fact a list of data.frames, with a "desctable" class. | |||
Methods for reduction to a simple dataframe (`as.data.frame`, automatically used for printing), conversion to markdown (`pander`), and interactive html output with **DT** (`datatable`) are provided: | |||
```{r} | |||
iris %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
<br> | |||
To use `pander` you need to load the package yourself. | |||
Calls to `pander` and `datatable` with "regular" dataframes will not be affected by the defaults used in the package, and you can modify these defaults for **desctable** objects. | |||
or install the development version (0.3) from github with | |||
The `datatable` wrapper function for desctable objects comes with some default options and formatting such as freezing the row names and table header, export buttons, and rounding of values. Both `pander` and `datatable` wrapper take a *digits* argument to set the number of decimals to show. (`pander` uses the *digits*, *justify* and *missing* arguments of `pandoc.table`, whereas `datatable` calls `prettyNum` with the `digits` parameter, and removes `NA` values. You can set `digits = NULL` if you want the full table and format it yourself) | |||
Subsequent outputs in this README will use **pander**. | |||
## Advanced usage | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* always show N | |||
* if there are factors, show % | |||
* if there are normally distributed variables, show Mean and SD | |||
* if there are non-normally distributed variables, show Median and IQR | |||
For each variable in the table, compute the relevant statistical functions in that list (non-applicable functions will safely return `NA`). | |||
You can specify the statistical functions yourself with the *stats* argument. This argument can either be: | |||
* a function for automatic selection of appropriate statistical functions, depending on the data | |||
* a named list of functions/formulas | |||
The functions/formulas leverage the **tidyverse** way of working with anonymous functions, i.e.: | |||
If a *function*, is is used as is. | |||
If a *formula*, e.g. '~ .x + 1' or `~ . + 1`, it is converted to a function. There are three ways to refer to the arguments: | |||
* For a single argument function, use '.' | |||
* For a two argument function, use '.x' and '.y' | |||
* For more arguments, use '..1', '..2', '..3' etc | |||
This syntax allows you to create very compact anonymous functions, and is the same as in the `map` family of functions from **purrr**. | |||
**Conditional formulas (`condition ~ if_T | if F`) from previous versions are no longer supported!** | |||
### Automatic function | |||
The default value for the *stats* argument is `stats_auto`, provided in the package. | |||
Several other "automatic statistical functions" are defined in this package: `stats_auto`, `stats_default`, `stats_normal`, `stats_nonnormal`. | |||
You can also provide your own automatic function, which needs to | |||
* accept a dataframe as its argument (whether to use this dataframe or not in the function is your choice), and | |||
* return a named list of statistical functions to use, as defined in the subsequent paragraphs. | |||
```{r} | |||
# Strictly equivalent to iris %>% desctable() %>% pander() | |||
iris %>% | |||
desctable(stats = stats_auto) %>% | |||
pander() | |||
``` | |||
<br> | |||
For reference, here is the body of the `stats_auto` function in the package: | |||
```{r, echo = F} | |||
print(stats_auto) | |||
``` | |||
<br> | |||
### Statistical functions | |||
Statistical functions can be **any** function defined in R that you want to use, such as `length` or `mean`. | |||
The only condition is that they return a single numerical value. One exception is when they return a vector of length `1 + nlevels(x)` when applied to factors, as is needed for the `percent` function. | |||
As mentioned above, they need to be used inside a **named list**, such as | |||
```{r} | |||
mtcars %>% | |||
desctable(stats = list("N" = length, "Mean" = mean, "SD" = sd)) %>% | |||
pander() | |||
devtools::install_github("desctable/desctable") | |||
``` | |||
<br> | |||
The names will be used as column headers in the resulting table, and the functions will be applied safely on the variables (errors return `NA`, and for factors the function will be used on individual levels). | |||
Several convenience functions are included in this package. | |||
* `percent`, which prints percentages of levels in a factor | |||
* `IQR`, which re-implements `stats::IQR` but works better with `NA` values | |||
* `is.normal`, which tests for normality using the following method: `length(na.omit(x)) > 30 & shapiro.test(x)$p.value > .1` | |||
Be aware that **all functions will be used on variables stripped of their `NA` values!** | |||
This is necessary for most statistical functions to be useful, and makes **N** (`length`) show only the number of observations in the dataset for each variable. | |||
# Roadmap | |||
### Labels | |||
## 0.3 | |||
It is often the case that variable names are not "pretty" enough to be used as-is in a table. | |||
Although you could still edit the variable labels in the table afterwards using sub-setting or string replacement functions, we provide a facility for this using the **labels** argument. | |||
The **labels** argument is a named character vector associating variable names and labels. | |||
You don't need to provide labels for all the variables, and extra labels will be silently discarded. This allows you to define a "global" labels vector and use it for multiple tables even after variable selections. | |||
```{r} | |||
mtlabels <- c(mpg = "Miles/(US) gallon", | |||
cyl = "Number of cylinders", | |||
disp = "Displacement (cu.in.)", | |||
hp = "Gross horsepower", | |||
drat = "Rear axle ratio", | |||
wt = "Weight (1000 lbs)", | |||
qsec = "¼ mile time", | |||
vs = "V/S", | |||
am = "Transmission", | |||
gear = "Number of forward gears", | |||
carb = "Number of carburetors") | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
desctable(labels = mtlabels) %>% | |||
pander() | |||
``` | |||
<br> | |||
---- | |||
# Comparative tables | |||
## Simple usage | |||
Creating a comparative table (between groups defined by a factor) using `desctable` is as easy as creating a descriptive table. | |||
It leverages the `group_by` function from **dplyr**: | |||
```{r} | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable() -> iris_by_Species | |||
iris_by_Species | |||
``` | |||
<br> | |||
The result is a table containing a descriptive sub-table for each level of the grouping factor (the statistical functions rules are applied to each sub-table independently), with the statistical tests performed, and their p values. | |||
When displayed as a flat dataframe, the grouping header appears in each variable name. | |||
You can also see the grouping headers by inspecting the resulting object, which is a nested list of dataframes, each dataframe being named after the grouping factor and its levels (with sample size for each). | |||
```{r} | |||
str(iris_by_Species) | |||
``` | |||
<br> | |||
You can specify groups based on any variable, not only factors: | |||
```{r} | |||
# With pander output | |||
mtcars %>% | |||
group_by(cyl) %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
<br> | |||
You can also specify groups based on an expression | |||
```{r} | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
<br> | |||
Multiple nested groups are also possible: | |||
```{r, message = F, warning = F} | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
group_by(vs, am, cyl) %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
<br> | |||
In the case of nested groups (a.k.a. sub-group analysis), statistical tests are performed only between the groups of the deepest grouping level. | |||
Statistical tests are automatically selected depending on the data and the grouping factor. | |||
## Advanced usage | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* if the variable is a factor, use `fisher.test` | |||
* if the grouping factor has only one level, use the provided `no.test` (which does nothing) | |||
* if the grouping factor has two levels | |||
* and the variable presents homoskedasticity (p value for `var.test` > .1) and normality of distribution in both groups, use `t.test(var.equal = T)` | |||
* and the variable does not present homoskedasticity (p value for `var.test` < .1) but normality of distribution in both groups, use `t.test(var.equal = F)` | |||
* else use `wilcox.test` | |||
* if the grouping factor has more than two levels | |||
* and the variable presents homoskedasticity (p value for `bartlett.test` > .1) and normality of distribution in all groups, use `oneway.test(var.equal = T)` | |||
* and the variable does not present homoskedasticity (p value for `bartlett.test` < .1) but normality of distribution in all groups, use `oneway.test(var.equal = F)` | |||
* else use `kruskal.test` | |||
You can specify the statistical test functions yourself with the *tests* argument. This argument can either be: | |||
* a function for automatic selection of appropriate statistical test functions, depending on the data | |||
* a named list of statistical test functions | |||
Please note that the statistical test functions **must** be given as *formulas* so as to capture the name of the test to display in the table. | |||
**purrr** style formulas are also accepted, as with the statistical functions. | |||
This also allows to specify optional arguments of such functions, and go around non-standard test functions (see **Statistical test functions**). | |||
### Automatic function | |||
The default value for the *tests* argument is `tests_auto`, provided in the package. | |||
You can also provide your own automatic function, which needs to | |||
* accept a variable and a grouping factor as its arguments, and | |||
* return a single-term formula containing a statistical test function. | |||
This function will be used on every variable and every grouping factor to determine the appropriate test. | |||
```{r} | |||
# Strictly equivalent to iris %>% group_by(Species) %>% desctable() %>% pander() | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable(tests = tests_auto) %>% | |||
pander() | |||
``` | |||
<br> | |||
For reference, here is the body of the `tests_auto` function in the package: | |||
```{r, echo = F} | |||
print(tests_auto) | |||
``` | |||
<br> | |||
### Statistical test functions | |||
You can provide a named list of statistical functions, but here the mechanism is a bit different from the *stats* argument. | |||
The list must contain either `.auto` or `.default`. | |||
* `.auto` needs to be an automatic function, such as `tests_auto`. It will be used by default on all variables to select a test | |||
* `.default` needs to be a single-term formula containing a statistical test function that will be used on all variables | |||
You can also provide overrides to use specific tests for specific variables. | |||
This is done using list items named as the variable and containing a single-term formula function. | |||
```{r} | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Species = ~chisq.test)) %>% | |||
pander() | |||
``` | |||
<br> | |||
```{r} | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
group_by(am) %>% | |||
desctable(tests = list(.default = ~wilcox.test, | |||
mpg = ~t.test)) %>% | |||
pander() | |||
``` | |||
Here's an example of **purrr** style function: | |||
```{r} | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Petal.Width = ~oneway.test(., var.equal = T))) | |||
``` | |||
<br> | |||
This new version introduces a new internal representation as well as an entirely new API for desctable ! | |||
The original `desctable` function and usage remains until 1.0, but begins deprecation. | |||
As with statistical functions, **any** statistical test function defined in R can be used. | |||
This new API is more flexible and more simple at the same time. Combine `group_by`, `desc_table`, `desc_tests`, and `desc_output` to create descriptive and comparative statistics tables and output them to various formats. | |||
The conditions are that the function | |||
The internal representation is now a simple dataframe in the simple descriptive case, and a nested dataframe with list-columns for comparative tables, allowing easier manipulation by the user. | |||
* accepts a formula (`variable ~ grouping_variable`) as a first positional argument (as is the case with most tests, like `t.test`), and | |||
* returns an object with a `p.value` element. | |||
## Next | |||
Several convenience function are provided: formula versions for `chisq.test` and `fisher.test` using generic S3 methods (thus the behavior of standard calls to `chisq.test` and `fisher.test` are not modified), and `ANOVA`, a partial application of `oneway.test` with parameter *var.equal* = T. | |||
- Add a `desc_output` for {gt} | |||
- Implement a way to make tables for survival analysis. | |||
- Implement a way to make tables for multivariate models. | |||
- Allow univariate tests for simple tables | |||
- add a column for totals in grouped tables |
@@ -2,818 +2,58 @@ Desctable | |||
================ | |||
[![Travis-CI Build | |||
Status](https://travis-ci.org/MaximeWack/desctable.svg?branch=master)](https://travis-ci.org/MaximeWack/desctable) | |||
Status](https://travis-ci.org/desctable/desctable.svg?branch=master)](https://travis-ci.org/desctable/desctable) | |||
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/desctable)](https://cran.r-project.org/package=desctable) | |||
[![CRAN RStudio mirror | |||
downloads](http://cranlogs.r-pkg.org/badges/desctable)](https://www.r-pkg.org:443/pkg/desctable) | |||
# Introduction | |||
Desctable is a comprehensive descriptive and comparative tables | |||
generator for R. | |||
**Warning to existing users** | |||
*This version introduces a new API that should make the creation of | |||
tables more flexible. | |||
The old API is still present but in a deprecated mode. | |||
See the roadmap below, and the website for the new usage. | |||
Suggestions about this change are welcome !* | |||
Every person doing data analysis has to create tables for descriptive | |||
summaries of data (a.k.a. Table.1), or comparative tables. | |||
Many packages, such as the aptly named **tableone**, address this issue. | |||
However, they often include hard-coded behaviors, have outputs not | |||
easily manipulable with standard R tools, or their syntax are | |||
out-of-style (e.g. the argument order makes them difficult to use with | |||
the pipe (`%>%`)). | |||
# Introduction | |||
Enter **desctable**, a package built with the following objectives in | |||
mind: | |||
Desctable aims to be a simple and expressive interface to building | |||
statistical tables in R. | |||
- generate descriptive and comparative statistics tables with nesting | |||
- keep the syntax as simple as possible | |||
- have good reasonable defaults | |||
- be entirely customizable, using standard R tools and functions | |||
- produce the simplest (as a data structure) output possible | |||
- provide helpers for different outputs | |||
- integrate with “modern” R usage, and the **tidyverse** set of tools | |||
- apply functional paradigms | |||
See [desctable.github.io](https://desctable.github.io) for usage ond | |||
documentation. | |||
# Installation | |||
Install from CRAN with | |||
Install from CRAN (0.1.9) with | |||
install.packages("desctable") | |||
or install the development version from github with | |||
devtools::install_github("maximewack/desctable") | |||
# Loading | |||
``` r | |||
library(desctable) | |||
``` | |||
It is recommended to read this manual through its vignette: | |||
``` r | |||
vignette("desctable") | |||
``` | |||
------------------------------------------------------------------------ | |||
# Descriptive tables | |||
## Simple usage | |||
**desctable** uses and exports the pipe (`%>%`) operator (from packages | |||
**magrittr** and **dplyr** fame), though it is not mandatory to use it. | |||
The single interface to the package is its eponymous `desctable` | |||
function. | |||
When used on a data.frame, it returns a descriptive table: | |||
``` r | |||
iris %>% | |||
desctable() | |||
``` | |||
## N % Mean sd Med IQR | |||
## 1 Sepal.Length 150 NA NA NA 5.80 1.3 | |||
## 2 Sepal.Width 150 NA 3.057333 0.4358663 3.00 0.5 | |||
## 3 Petal.Length 150 NA NA NA 4.35 3.5 | |||
## 4 Petal.Width 150 NA NA NA 1.30 1.5 | |||
## 5 Species 150 NA NA NA NA NA | |||
## 6 Species: setosa 50 33.33333 NA NA NA NA | |||
## 7 Species: versicolor 50 33.33333 NA NA NA NA | |||
## 8 Species: virginica 50 33.33333 NA NA NA NA | |||
``` r | |||
desctable(mtcars) | |||
``` | |||
## N Mean sd Med IQR | |||
## 1 mpg 32 20.090625 6.0269481 19.200 7.37500 | |||
## 2 cyl 32 NA NA 6.000 4.00000 | |||
## 3 disp 32 NA NA 196.300 205.17500 | |||
## 4 hp 32 NA NA 123.000 83.50000 | |||
## 5 drat 32 3.596563 0.5346787 3.695 0.84000 | |||
## 6 wt 32 NA NA 3.325 1.02875 | |||
## 7 qsec 32 17.848750 1.7869432 17.710 2.00750 | |||
## 8 vs 32 NA NA 0.000 1.00000 | |||
## 9 am 32 NA NA 0.000 1.00000 | |||
## 10 gear 32 NA NA 4.000 1.00000 | |||
## 11 carb 32 NA NA 2.000 2.00000 | |||
<br> | |||
As you can see with these two examples, `desctable` describes every | |||
variable, with individual levels for factors. It picks statistical | |||
functions depending on the type and distribution of the variables in the | |||
data, and applies those statistical functions only on the relevant | |||
variables. | |||
## Output | |||
The object produced by `desctable` is in fact a list of data.frames, | |||
with a “desctable” class. | |||
Methods for reduction to a simple dataframe (`as.data.frame`, | |||
automatically used for printing), conversion to markdown (`pander`), and | |||
interactive html output with **DT** (`datatable`) are provided: | |||
``` r | |||
iris %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
| | N | % | Mean | sd | Med | IQR | | |||
|:---------------|:----|:----|:-----|:-----|:----|:----| | |||
| Sepal.Length | 150 | | | | 5.8 | 1.3 | | |||
| Sepal.Width | 150 | | 3.1 | 0.44 | 3 | 0.5 | | |||
| Petal.Length | 150 | | | | 4.3 | 3.5 | | |||
| Petal.Width | 150 | | | | 1.3 | 1.5 | | |||
| **Species** | 150 | | | | | | | |||
| setosa | 50 | 33 | | | | | | |||
| versicolor | 50 | 33 | | | | | | |||
| virginica | 50 | 33 | | | | | | |||
<br> | |||
To use `pander` you need to load the package yourself. | |||
Calls to `pander` and `datatable` with “regular” dataframes will not be | |||
affected by the defaults used in the package, and you can modify these | |||
defaults for **desctable** objects. | |||
The `datatable` wrapper function for desctable objects comes with some | |||
default options and formatting such as freezing the row names and table | |||
header, export buttons, and rounding of values. Both `pander` and | |||
`datatable` wrapper take a *digits* argument to set the number of | |||
decimals to show. (`pander` uses the *digits*, *justify* and *missing* | |||
arguments of `pandoc.table`, whereas `datatable` calls `prettyNum` with | |||
the `digits` parameter, and removes `NA` values. You can set | |||
`digits = NULL` if you want the full table and format it yourself) | |||
Subsequent outputs in this README will use **pander**. | |||
## Advanced usage | |||
`desctable` automatically chooses statistical functions if none is | |||
provided, using the following algorithm: | |||
- always show N | |||
- if there are factors, show % | |||
- if there are normally distributed variables, show Mean and SD | |||
- if there are non-normally distributed variables, show Median and IQR | |||
For each variable in the table, compute the relevant statistical | |||
functions in that list (non-applicable functions will safely return | |||
`NA`). | |||
You can specify the statistical functions yourself with the *stats* | |||
argument. This argument can either be: | |||
- a function for automatic selection of appropriate statistical | |||
functions, depending on the data | |||
- a named list of functions/formulas | |||
The functions/formulas leverage the **tidyverse** way of working with | |||
anonymous functions, i.e.: | |||
If a *function*, is is used as is. If a *formula*, e.g. ‘\~ .x + 1’ or | |||
`~ . + 1`, it is converted to a function. There are three ways to refer | |||
to the arguments: | |||
- For a single argument function, use ‘.’ | |||
- For a two argument function, use ‘.x’ and ‘.y’ | |||
- For more arguments, use ‘..1’, ‘..2’, ‘..3’ etc | |||
This syntax allows you to create very compact anonymous functions, and | |||
is the same as in the `map` family of functions from **purrr**. | |||
**Conditional formulas (`condition ~ if_T | if F`) from previous | |||
versions are no longer supported!** | |||
### Automatic function | |||
The default value for the *stats* argument is `stats_auto`, provided in | |||
the package. | |||
Several other “automatic statistical functions” are defined in this | |||
package: `stats_auto`, `stats_default`, `stats_normal`, | |||
`stats_nonnormal`. | |||
You can also provide your own automatic function, which needs to | |||
- accept a dataframe as its argument (whether to use this dataframe or | |||
not in the function is your choice), and | |||
- return a named list of statistical functions to use, as defined in | |||
the subsequent paragraphs. | |||
``` r | |||
# Strictly equivalent to iris %>% desctable() %>% pander() | |||
iris %>% | |||
desctable(stats = stats_auto) %>% | |||
pander() | |||
``` | |||
| | N | % | Mean | sd | Med | IQR | | |||
|:---------------|:----|:----|:-----|:-----|:----|:----| | |||
| Sepal.Length | 150 | | | | 5.8 | 1.3 | | |||
| Sepal.Width | 150 | | 3.1 | 0.44 | 3 | 0.5 | | |||
| Petal.Length | 150 | | | | 4.3 | 3.5 | | |||
| Petal.Width | 150 | | | | 1.3 | 1.5 | | |||
| **Species** | 150 | | | | | | | |||
| setosa | 50 | 33 | | | | | | |||
| versicolor | 50 | 33 | | | | | | |||
| virginica | 50 | 33 | | | | | | |||
<br> | |||
For reference, here is the body of the `stats_auto` function in the | |||
package: | |||
## function (data) | |||
## { | |||
## shapiro <- data %>% Filter(f = is.numeric) %>% lapply(is.normal) %>% | |||
## unlist() | |||
## if (length(shapiro) == 0) { | |||
## normal <- F | |||
## nonnormal <- F | |||
## } | |||
## else { | |||
## normal <- any(shapiro) | |||
## nonnormal <- any(!shapiro) | |||
## } | |||
## fact <- data %>% lapply(is.factor) %>% unlist() %>% any() | |||
## if (fact & normal & !nonnormal) | |||
## stats_normal(data) | |||
## else if (fact & !normal & nonnormal) | |||
## stats_nonnormal(data) | |||
## else if (fact & !normal & !nonnormal) | |||
## list(N = length, `%` = percent) | |||
## else if (!fact & normal & nonnormal) | |||
## list(N = length, Mean = ~if (is.normal(.)) mean(.), sd = ~if (is.normal(.)) sd(.), | |||
## Med = stats::median, IQR = ~if (!is.factor(.)) IQR(.)) | |||
## else if (!fact & normal & !nonnormal) | |||
## list(N = length, Mean = mean, sd = stats::sd) | |||
## else if (!fact & !normal & nonnormal) | |||
## list(N = length, Med = stats::median, IQR = IQR) | |||
## else stats_default(data) | |||
## } | |||
## <bytecode: 0x56144fe581a8> | |||
## <environment: namespace:desctable> | |||
<br> | |||
### Statistical functions | |||
Statistical functions can be **any** function defined in R that you want | |||
to use, such as `length` or `mean`. | |||
The only condition is that they return a single numerical value. One | |||
exception is when they return a vector of length `1 + nlevels(x)` when | |||
applied to factors, as is needed for the `percent` function. | |||
As mentioned above, they need to be used inside a **named list**, such | |||
as | |||
``` r | |||
mtcars %>% | |||
desctable(stats = list("N" = length, "Mean" = mean, "SD" = sd)) %>% | |||
pander() | |||
``` | |||
| | N | Mean | SD | | |||
|:-----|:----|:-----|:-----| | |||
| mpg | 32 | 20 | 6 | | |||
| cyl | 32 | 6.2 | 1.8 | | |||
| disp | 32 | 231 | 124 | | |||
| hp | 32 | 147 | 69 | | |||
| drat | 32 | 3.6 | 0.53 | | |||
| wt | 32 | 3.2 | 0.98 | | |||
| qsec | 32 | 18 | 1.8 | | |||
| vs | 32 | 0.44 | 0.5 | | |||
| am | 32 | 0.41 | 0.5 | | |||
| gear | 32 | 3.7 | 0.74 | | |||
| carb | 32 | 2.8 | 1.6 | | |||
<br> | |||
The names will be used as column headers in the resulting table, and the | |||
functions will be applied safely on the variables (errors return `NA`, | |||
and for factors the function will be used on individual levels). | |||
Several convenience functions are included in this package. | |||
- `percent`, which prints percentages of levels in a factor | |||
- `IQR`, which re-implements `stats::IQR` but works better with `NA` | |||
values | |||
- `is.normal`, which tests for normality using the following method: | |||
`length(na.omit(x)) > 30 & shapiro.test(x)$p.value > .1` | |||
Be aware that **all functions will be used on variables stripped of | |||
their `NA` values!** This is necessary for most statistical functions to | |||
be useful, and makes **N** (`length`) show only the number of | |||
observations in the dataset for each variable. | |||
### Labels | |||
It is often the case that variable names are not “pretty” enough to be | |||
used as-is in a table. | |||
Although you could still edit the variable labels in the table | |||
afterwards using sub-setting or string replacement functions, we provide | |||
a facility for this using the **labels** argument. | |||
The **labels** argument is a named character vector associating variable | |||
names and labels. | |||
You don’t need to provide labels for all the variables, and extra labels | |||
will be silently discarded. This allows you to define a “global” labels | |||
vector and use it for multiple tables even after variable selections. | |||
``` r | |||
mtlabels <- c(mpg = "Miles/(US) gallon", | |||
cyl = "Number of cylinders", | |||
disp = "Displacement (cu.in.)", | |||
hp = "Gross horsepower", | |||
drat = "Rear axle ratio", | |||
wt = "Weight (1000 lbs)", | |||
qsec = "¼ mile time", | |||
vs = "V/S", | |||
am = "Transmission", | |||
gear = "Number of forward gears", | |||
carb = "Number of carburetors") | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
desctable(labels = mtlabels) %>% | |||
pander() | |||
``` | |||
| | N | % | Mean | sd | Med | IQR | | |||
|:------------------------|:----|:----|:-----|:-----|:----|:-----| | |||
| Miles/(US) gallon | 32 | | 20 | 6 | 19 | 7.4 | | |||
| Number of cylinders | 32 | | | | 6 | 4 | | |||
| Displacement (cu.in.) | 32 | | | | 196 | 205 | | |||
| Gross horsepower | 32 | | | | 123 | 84 | | |||
| Rear axle ratio | 32 | | 3.6 | 0.53 | 3.7 | 0.84 | | |||
| Weight (1000 lbs) | 32 | | | | 3.3 | 1 | | |||
| ¼ mile time | 32 | | 18 | 1.8 | 18 | 2 | | |||
| V/S | 32 | | | | 0 | 1 | | |||
| **Transmission** | 32 | | | | | | | |||
| Automatic | 19 | 59 | | | | | | |||
| Manual | 13 | 41 | | | | | | |||
| Number of forward gears | 32 | | | | 4 | 1 | | |||
| Number of carburetors | 32 | | | | 2 | 2 | | |||
<br> | |||
------------------------------------------------------------------------ | |||
# Comparative tables | |||
## Simple usage | |||
Creating a comparative table (between groups defined by a factor) using | |||
`desctable` is as easy as creating a descriptive table. | |||
It leverages the `group_by` function from **dplyr**: | |||
``` r | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable() -> iris_by_Species | |||
iris_by_Species | |||
``` | |||
## Species: setosa (n=50) / N Species: setosa (n=50) / Mean | |||
## 1 Sepal.Length 50 5.006 | |||
## 2 Sepal.Width 50 3.428 | |||
## 3 Petal.Length 50 NA | |||
## 4 Petal.Width 50 NA | |||
## Species: setosa (n=50) / sd Species: setosa (n=50) / Med | |||
## 1 0.3524897 5.0 | |||
## 2 0.3790644 3.4 | |||
## 3 NA 1.5 | |||
## 4 NA 0.2 | |||
## Species: setosa (n=50) / IQR Species: versicolor (n=50) / N | |||
## 1 0.400 50 | |||
## 2 0.475 50 | |||
## 3 0.175 50 | |||
## 4 0.100 50 | |||
## Species: versicolor (n=50) / Mean Species: versicolor (n=50) / sd | |||
## 1 5.936 0.5161711 | |||
## 2 2.770 0.3137983 | |||
## 3 4.260 0.4699110 | |||
## 4 NA NA | |||
## Species: versicolor (n=50) / Med Species: versicolor (n=50) / IQR | |||
## 1 5.90 0.700 | |||
## 2 2.80 0.475 | |||
## 3 4.35 0.600 | |||
## 4 1.30 0.300 | |||
## Species: virginica (n=50) / N Species: virginica (n=50) / Mean | |||
## 1 50 6.588 | |||
## 2 50 2.974 | |||
## 3 50 5.552 | |||
## 4 50 NA | |||
## Species: virginica (n=50) / sd Species: virginica (n=50) / Med | |||
## 1 0.6358796 6.50 | |||
## 2 0.3224966 3.00 | |||
## 3 0.5518947 5.55 | |||
## 4 NA 2.00 | |||
## Species: virginica (n=50) / IQR tests / p tests / test | |||
## 1 0.675 1.505059e-28 oneway.test(., var.equal = F) | |||
## 2 0.375 4.492017e-17 oneway.test(., var.equal = T) | |||
## 3 0.775 4.803974e-29 kruskal.test | |||
## 4 0.500 3.261796e-29 kruskal.test | |||
<br> | |||
The result is a table containing a descriptive sub-table for each level | |||
of the grouping factor (the statistical functions rules are applied to | |||
each sub-table independently), with the statistical tests performed, and | |||
their p values. | |||
When displayed as a flat dataframe, the grouping header appears in each | |||
variable name. | |||
You can also see the grouping headers by inspecting the resulting | |||
object, which is a nested list of dataframes, each dataframe being named | |||
after the grouping factor and its levels (with sample size for each). | |||
``` r | |||
str(iris_by_Species) | |||
``` | |||
## List of 5 | |||
## $ Variables :'data.frame': 4 obs. of 1 variable: | |||
## ..$ Variables: chr [1:4] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" | |||
## $ Species: setosa (n=50) :'data.frame': 4 obs. of 5 variables: | |||
## ..$ N : int [1:4] 50 50 50 50 | |||
## ..$ Mean: num [1:4] 5.01 3.43 NA NA | |||
## ..$ sd : num [1:4] 0.352 0.379 NA NA | |||
## ..$ Med : num [1:4] 5 3.4 1.5 0.2 | |||
## ..$ IQR : num [1:4] 0.4 0.475 0.175 0.1 | |||
## $ Species: versicolor (n=50):'data.frame': 4 obs. of 5 variables: | |||
## ..$ N : int [1:4] 50 50 50 50 | |||
## ..$ Mean: num [1:4] 5.94 2.77 4.26 NA | |||
## ..$ sd : num [1:4] 0.516 0.314 0.47 NA | |||
## ..$ Med : num [1:4] 5.9 2.8 4.35 1.3 | |||
## ..$ IQR : num [1:4] 0.7 0.475 0.6 0.3 | |||
## $ Species: virginica (n=50) :'data.frame': 4 obs. of 5 variables: | |||
## ..$ N : int [1:4] 50 50 50 50 | |||
## ..$ Mean: num [1:4] 6.59 2.97 5.55 NA | |||
## ..$ sd : num [1:4] 0.636 0.322 0.552 NA | |||
## ..$ Med : num [1:4] 6.5 3 5.55 2 | |||
## ..$ IQR : num [1:4] 0.675 0.375 0.775 0.5 | |||
## $ tests :'data.frame': 4 obs. of 2 variables: | |||
## ..$ p : num [1:4] 1.51e-28 4.49e-17 4.80e-29 3.26e-29 | |||
## ..$ test: chr [1:4] "oneway.test(., var.equal = F)" "oneway.test(., var.equal = T)" "kruskal.test" "kruskal.test" | |||
## - attr(*, "class")= chr "desctable" | |||
<br> | |||
You can specify groups based on any variable, not only factors: | |||
``` r | |||
# With pander output | |||
mtcars %>% | |||
group_by(cyl) %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
| | cyl: 4 (n=11)<br/>N | <br/>Med | <br/>IQR | cyl: 6 (n=7)<br/>N | <br/>Med | <br/>IQR | cyl: 8 (n=14)<br/>N | <br/>Med | <br/>IQR | tests<br/>p | <br/>test | | |||
|:-----|:--------------------|:---------|:---------|:-------------------|:---------|:---------|:--------------------|:---------|:---------|:------------|:-------------| | |||
| mpg | 11 | 26 | 7.6 | 7 | 20 | 2.4 | 14 | 15 | 1.8 | 2.6e-06 | kruskal.test | | |||
| disp | 11 | 108 | 42 | 7 | 168 | 36 | 14 | 350 | 88 | 1.6e-06 | kruskal.test | | |||
| hp | 11 | 91 | 30 | 7 | 110 | 13 | 14 | 192 | 65 | 3.3e-06 | kruskal.test | | |||
| drat | 11 | 4.1 | 0.35 | 7 | 3.9 | 0.56 | 14 | 3.1 | 0.15 | 0.00075 | kruskal.test | | |||
| wt | 11 | 2.2 | 0.74 | 7 | 3.2 | 0.62 | 14 | 3.8 | 0.48 | 1.1e-05 | kruskal.test | | |||
| qsec | 11 | 19 | 1.4 | 7 | 18 | 2.4 | 14 | 17 | 1.5 | 0.0062 | kruskal.test | | |||
| vs | 11 | 1 | 0 | 7 | 1 | 1 | 14 | 0 | 0 | 3.2e-05 | kruskal.test | | |||
| am | 11 | 1 | 0.5 | 7 | 0 | 1 | 14 | 0 | 0 | 0.014 | kruskal.test | | |||
| gear | 11 | 4 | 0 | 7 | 4 | 0.5 | 14 | 3 | 0 | 0.0062 | kruskal.test | | |||
| carb | 11 | 2 | 1 | 7 | 4 | 1.5 | 14 | 3.5 | 1.8 | 0.0017 | kruskal.test | | |||
<br> | |||
You can also specify groups based on an expression | |||
``` r | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
| | Petal.Length \> 5: FALSE (n=108)<br/>N | <br/>% | <br/>Mean | <br/>sd | <br/>Med | <br/>IQR | Petal.Length \> 5: TRUE (n=42)<br/>N | <br/>% | <br/>Mean | <br/>sd | <br/>Med | <br/>IQR | tests<br/>p | <br/>test | | |||
|:---------------|:---------------------------------------|:-------|:----------|:--------|:---------|:---------|:-------------------------------------|:-------|:----------|:--------|:---------|:---------|:------------|:------------| | |||
| Sepal.Length | 108 | | | | 5.5 | 1 | 42 | | | | 6.7 | 0.85 | 1.6e-15 | wilcox.test | | |||
| Sepal.Width | 108 | | 3.1 | 0.48 | 3 | 0.6 | 42 | | | | 3 | 0.4 | 0.69 | wilcox.test | | |||
| Petal.Length | 108 | | | | 3.5 | 3 | 42 | | | | 5.6 | 0.67 | 2.1e-21 | wilcox.test | | |||
| Petal.Width | 108 | | | | 1 | 1.2 | 42 | | 2.1 | 0.28 | 2.1 | 0.47 | 1.6e-19 | wilcox.test | | |||
| **Species** | 108 | | | | | | 42 | | | | | | 2.5e-26 | fisher.test | | |||
| setosa | 50 | 46 | | | | | 0 | 0 | | | | | | | | |||
| versicolor | 49 | 45 | | | | | 1 | 2.4 | | | | | | | | |||
| virginica | 9 | 8.3 | | | | | 41 | 98 | | | | | | | | |||
<br> | |||
Multiple nested groups are also possible: | |||
``` r | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
group_by(vs, am, cyl) %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
| | vs: 0 (n=18)<br/>am: Automatic (n=12)<br/>cyl: 8 (n=12)<br/>N | <br/><br/><br/>Med | <br/><br/><br/>IQR | <br/><br/>tests<br/>p | <br/><br/><br/>test | <br/>am: Manual (n=6)<br/>cyl: 4 (n=1)<br/>N | <br/><br/><br/>Med | <br/><br/><br/>IQR | <br/><br/>cyl: 6 (n=3)<br/>N | <br/><br/><br/>Med | <br/><br/><br/>IQR | <br/><br/>cyl: 8 (n=2)<br/>N | <br/><br/><br/>Med | <br/><br/><br/>IQR | <br/><br/>tests<br/>p | <br/><br/><br/>test | vs: 1 (n=14)<br/>am: Automatic (n=7)<br/>cyl: 4 (n=3)<br/>N | <br/><br/><br/>Med | <br/><br/><br/>IQR | <br/><br/>cyl: 6 (n=4)<br/>N | <br/><br/><br/>Med | <br/><br/><br/>IQR | <br/><br/>tests<br/>p | <br/><br/><br/>test | <br/>am: Manual (n=7)<br/>cyl: 4 (n=7)<br/>N | <br/><br/><br/>Med | <br/><br/><br/>IQR | <br/><br/>tests<br/>p | <br/><br/><br/>test | | |||
|:-----|:--------------------------------------------------------------|:-------------------|:-------------------|:----------------------|:--------------------|:---------------------------------------------|:-------------------|:-------------------|:-----------------------------|:-------------------|:-------------------|:-----------------------------|:-------------------|:-------------------|:----------------------|:--------------------|:------------------------------------------------------------|:-------------------|:-------------------|:-----------------------------|:-------------------|:-------------------|:----------------------|:--------------------|:---------------------------------------------|:-------------------|:-------------------|:----------------------|:--------------------| | |||
| mpg | 12 | 15 | 2.6 | | no.test | 1 | 26 | 0 | 3 | 21 | 0.65 | 2 | 15 | 0.4 | 0.11 | kruskal.test | 3 | 23 | 1.5 | 4 | 19 | 1.7 | 0.057 | wilcox.test | 7 | 30 | 6.3 | | no.test | | |||
| disp | 12 | 355 | 113 | | no.test | 1 | 120 | 0 | 3 | 160 | 7.5 | 2 | 326 | 25 | 0.11 | kruskal.test | 3 | 141 | 13 | 4 | 196 | 66 | 0.05 | wilcox.test | 7 | 79 | 24 | | no.test | | |||
| hp | 12 | 180 | 44 | | no.test | 1 | 91 | 0 | 3 | 110 | 32 | 2 | 300 | 36 | 0.11 | kruskal.test | 3 | 95 | 18 | 4 | 116 | 14 | 0.05 | wilcox.test | 7 | 66 | 36 | | no.test | | |||
| drat | 12 | 3.1 | 0.11 | | no.test | 1 | 4.4 | 0 | 3 | 3.9 | 0.14 | 2 | 3.9 | 0.34 | 0.33 | kruskal.test | 3 | 3.7 | 0.11 | 4 | 3.5 | 0.92 | 0.85 | wilcox.test | 7 | 4.1 | 0.2 | | no.test | | |||
| wt | 12 | 3.8 | 0.81 | | no.test | 1 | 2.1 | 0 | 3 | 2.8 | 0.13 | 2 | 3.4 | 0.2 | 0.12 | kruskal.test | 3 | 3.1 | 0.36 | 4 | 3.4 | 0.061 | 0.05 | wilcox.test | 7 | 1.9 | 0.53 | | no.test | | |||
| qsec | 12 | 17 | 0.67 | | no.test | 1 | 17 | 0 | 3 | 16 | 0.76 | 2 | 15 | 0.05 | 0.17 | kruskal.test | 3 | 20 | 1.4 | 4 | 19 | 0.89 | 0.23 | wilcox.test | 7 | 19 | 0.62 | | no.test | | |||
| gear | 12 | 3 | 0 | | no.test | 1 | 5 | 0 | 3 | 4 | 0.5 | 2 | 5 | 0 | 0.29 | kruskal.test | 3 | 4 | 0.5 | 4 | 3.5 | 1 | 0.84 | wilcox.test | 7 | 4 | 0 | | no.test | | |||
| carb | 12 | 3 | 2 | | no.test | 1 | 2 | 0 | 3 | 4 | 1 | 2 | 6 | 2 | 0.26 | kruskal.test | 3 | 2 | 0.5 | 4 | 2.5 | 3 | 0.85 | wilcox.test | 7 | 1 | 1 | | no.test | | |||
<br> | |||
In the case of nested groups (a.k.a. sub-group analysis), statistical | |||
tests are performed only between the groups of the deepest grouping | |||
level. | |||
Statistical tests are automatically selected depending on the data and | |||
the grouping factor. | |||
## Advanced usage | |||
`desctable` automatically chooses statistical functions if none is | |||
provided, using the following algorithm: | |||
- if the variable is a factor, use `fisher.test` | |||
- if the grouping factor has only one level, use the provided | |||
`no.test` (which does nothing) | |||
- if the grouping factor has two levels | |||
- and the variable presents homoskedasticity (p value for | |||
`var.test` \> .1) and normality of distribution in both groups, | |||
use `t.test(var.equal = T)` | |||
- and the variable does not present homoskedasticity (p value for | |||
`var.test` \< .1) but normality of distribution in both groups, | |||
use `t.test(var.equal = F)` | |||
- else use `wilcox.test` | |||
- if the grouping factor has more than two levels | |||
- and the variable presents homoskedasticity (p value for | |||
`bartlett.test` \> .1) and normality of distribution in all | |||
groups, use `oneway.test(var.equal = T)` | |||
- and the variable does not present homoskedasticity (p value for | |||
`bartlett.test` \< .1) but normality of distribution in all | |||
groups, use `oneway.test(var.equal = F)` | |||
- else use `kruskal.test` | |||
You can specify the statistical test functions yourself with the *tests* | |||
argument. This argument can either be: | |||
- a function for automatic selection of appropriate statistical test | |||
functions, depending on the data | |||
- a named list of statistical test functions | |||
Please note that the statistical test functions **must** be given as | |||
*formulas* so as to capture the name of the test to display in the | |||
table. **purrr** style formulas are also accepted, as with the | |||
statistical functions. This also allows to specify optional arguments of | |||
such functions, and go around non-standard test functions (see | |||
**Statistical test functions**). | |||
### Automatic function | |||
The default value for the *tests* argument is `tests_auto`, provided in | |||
the package. | |||
You can also provide your own automatic function, which needs to | |||
- accept a variable and a grouping factor as its arguments, and | |||
- return a single-term formula containing a statistical test function. | |||
This function will be used on every variable and every grouping factor | |||
to determine the appropriate test. | |||
``` r | |||
# Strictly equivalent to iris %>% group_by(Species) %>% desctable() %>% pander() | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable(tests = tests_auto) %>% | |||
pander() | |||
``` | |||
| | Species: setosa (n=50)<br/>N | <br/>Mean | <br/>sd | <br/>Med | <br/>IQR | Species: versicolor (n=50)<br/>N | <br/>Mean | <br/>sd | <br/>Med | <br/>IQR | Species: virginica (n=50)<br/>N | <br/>Mean | <br/>sd | <br/>Med | <br/>IQR | tests<br/>p | <br/>test | | |||
|:-------------|:-----------------------------|:----------|:--------|:---------|:---------|:---------------------------------|:----------|:--------|:---------|:---------|:--------------------------------|:----------|:--------|:---------|:---------|:------------|:------------------------------| | |||
| Sepal.Length | 50 | 5 | 0.35 | 5 | 0.4 | 50 | 5.9 | 0.52 | 5.9 | 0.7 | 50 | 6.6 | 0.64 | 6.5 | 0.67 | 1.5e-28 | oneway.test(., var.equal = F) | | |||
| Sepal.Width | 50 | 3.4 | 0.38 | 3.4 | 0.48 | 50 | 2.8 | 0.31 | 2.8 | 0.48 | 50 | 3 | 0.32 | 3 | 0.38 | 4.5e-17 | oneway.test(., var.equal = T) | | |||
| Petal.Length | 50 | | | 1.5 | 0.18 | 50 | 4.3 | 0.47 | 4.3 | 0.6 | 50 | 5.6 | 0.55 | 5.5 | 0.78 | 4.8e-29 | kruskal.test | | |||
| Petal.Width | 50 | | | 0.2 | 0.1 | 50 | | | 1.3 | 0.3 | 50 | | | 2 | 0.5 | 3.3e-29 | kruskal.test | | |||
<br> | |||
For reference, here is the body of the `tests_auto` function in the | |||
package: | |||
## function (var, grp) | |||
## { | |||
## grp <- factor(grp) | |||
## if (nlevels(grp) < 2) | |||
## ~no.test | |||
## else if (is.factor(var)) { | |||
## if (tryCatch(is.numeric(fisher.test(var ~ grp)$p.value), | |||
## error = function(e) F)) | |||
## ~fisher.test | |||
## else ~chisq.test | |||
## } | |||
## else { | |||
## all_normal <- all(tapply(var, grp, is.normal)) | |||
## if (nlevels(grp) == 2) { | |||
## if (all_normal) { | |||
## if (tryCatch(stats::var.test(var ~ grp)$p.value > | |||
## 0.1, warning = function(e) F, error = function(e) F)) | |||
## ~t.test(., var.equal = T) | |||
## else ~t.test(., var.equal = F) | |||
## } | |||
## else ~wilcox.test | |||
## } | |||
## else { | |||
## if (all_normal) { | |||
## if (tryCatch(stats::bartlett.test(var ~ grp)$p.value > | |||
## 0.1, warning = function(e) F, error = function(e) F)) | |||
## ~oneway.test(., var.equal = T) | |||
## else ~oneway.test(., var.equal = F) | |||
## } | |||
## else ~kruskal.test | |||
## } | |||
## } | |||
## } | |||
## <bytecode: 0x56144cbe3948> | |||
## <environment: namespace:desctable> | |||
<br> | |||
### Statistical test functions | |||
You can provide a named list of statistical functions, but here the | |||
mechanism is a bit different from the *stats* argument. | |||
The list must contain either `.auto` or `.default`. | |||
- `.auto` needs to be an automatic function, such as `tests_auto`. It | |||
will be used by default on all variables to select a test | |||
- `.default` needs to be a single-term formula containing a | |||
statistical test function that will be used on all variables | |||
You can also provide overrides to use specific tests for specific | |||
variables. | |||
This is done using list items named as the variable and containing a | |||
single-term formula function. | |||
``` r | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Species = ~chisq.test)) %>% | |||
pander() | |||
``` | |||
| | Petal.Length \> 5: FALSE (n=108)<br/>N | <br/>% | <br/>Mean | <br/>sd | <br/>Med | <br/>IQR | Petal.Length \> 5: TRUE (n=42)<br/>N | <br/>% | <br/>Mean | <br/>sd | <br/>Med | <br/>IQR | tests<br/>p | <br/>test | | |||
|:---------------|:---------------------------------------|:-------|:----------|:--------|:---------|:---------|:-------------------------------------|:-------|:----------|:--------|:---------|:---------|:------------|:------------| | |||
| Sepal.Length | 108 | | | | 5.5 | 1 | 42 | | | | 6.7 | 0.85 | 1.6e-15 | wilcox.test | | |||
| Sepal.Width | 108 | | 3.1 | 0.48 | 3 | 0.6 | 42 | | | | 3 | 0.4 | 0.69 | wilcox.test | | |||
| Petal.Length | 108 | | | | 3.5 | 3 | 42 | | | | 5.6 | 0.67 | 2.1e-21 | wilcox.test | | |||
| Petal.Width | 108 | | | | 1 | 1.2 | 42 | | 2.1 | 0.28 | 2.1 | 0.47 | 1.6e-19 | wilcox.test | | |||
| **Species** | 108 | | | | | | 42 | | | | | | 2.7e-24 | chisq.test | | |||
| setosa | 50 | 46 | | | | | 0 | 0 | | | | | | | | |||
| versicolor | 49 | 45 | | | | | 1 | 2.4 | | | | | | | | |||
| virginica | 9 | 8.3 | | | | | 41 | 98 | | | | | | | | |||
<br> | |||
``` r | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
group_by(am) %>% | |||
desctable(tests = list(.default = ~wilcox.test, | |||
mpg = ~t.test)) %>% | |||
pander() | |||
``` | |||
| | am: Automatic (n=19)<br/>N | <br/>Med | <br/>IQR | am: Manual (n=13)<br/>N | <br/>Med | <br/>IQR | tests<br/>p | <br/>test | | |||
|:-----|:---------------------------|:---------|:---------|:------------------------|:---------|:---------|:------------|:------------| | |||
| mpg | 19 | 17 | 4.2 | 13 | 23 | 9.4 | 0.0014 | t.test | | |||
| cyl | 19 | 8 | 2 | 13 | 4 | 2 | 0.0039 | wilcox.test | | |||
| disp | 19 | 276 | 164 | 13 | 120 | 81 | 0.00055 | wilcox.test | | |||
| hp | 19 | 175 | 76 | 13 | 109 | 47 | 0.046 | wilcox.test | | |||
| drat | 19 | 3.1 | 0.63 | 13 | 4.1 | 0.37 | 0.00014 | wilcox.test | | |||
| wt | 19 | 3.5 | 0.41 | 13 | 2.3 | 0.84 | 4.3e-05 | wilcox.test | | |||
| qsec | 19 | 18 | 2 | 13 | 17 | 2.1 | 0.27 | wilcox.test | | |||
| vs | 19 | 0 | 1 | 13 | 1 | 1 | 0.36 | wilcox.test | | |||
| gear | 19 | 3 | 0 | 13 | 4 | 1 | 7.6e-06 | wilcox.test | | |||
| carb | 19 | 3 | 2 | 13 | 2 | 3 | 0.74 | wilcox.test | | |||
or install the development version (0.3) from github with | |||
Here’s an example of **purrr** style function: | |||
devtools::install_github("desctable/desctable") | |||
``` r | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Petal.Width = ~oneway.test(., var.equal = T))) | |||
``` | |||
# Roadmap | |||
## Petal.Length > 5: FALSE (n=108) / N | |||
## 1 Sepal.Length 108 | |||
## 2 Sepal.Width 108 | |||
## 3 Petal.Length 108 | |||
## 4 Petal.Width 108 | |||
## 5 Species 108 | |||
## 6 Species: setosa 50 | |||
## 7 Species: versicolor 49 | |||
## 8 Species: virginica 9 | |||
## Petal.Length > 5: FALSE (n=108) / % Petal.Length > 5: FALSE (n=108) / Mean | |||
## 1 NA NA | |||
## 2 NA 3.066667 | |||
## 3 NA NA | |||
## 4 NA NA | |||
## 5 NA NA | |||
## 6 46.296296 NA | |||
## 7 45.370370 NA | |||
## 8 8.333333 NA | |||
## Petal.Length > 5: FALSE (n=108) / sd Petal.Length > 5: FALSE (n=108) / Med | |||
## 1 NA 5.5 | |||
## 2 0.4800701 3.0 | |||
## 3 NA 3.5 | |||
## 4 NA 1.0 | |||
## 5 NA NA | |||
## 6 NA NA | |||
## 7 NA NA | |||
## 8 NA NA | |||
## Petal.Length > 5: FALSE (n=108) / IQR Petal.Length > 5: TRUE (n=42) / N | |||
## 1 1.0 42 | |||
## 2 0.6 42 | |||
## 3 3.0 42 | |||
## 4 1.2 42 | |||
## 5 NA 42 | |||
## 6 NA 0 | |||
## 7 NA 1 | |||
## 8 NA 41 | |||
## Petal.Length > 5: TRUE (n=42) / % Petal.Length > 5: TRUE (n=42) / Mean | |||
## 1 NA NA | |||
## 2 NA NA | |||
## 3 NA NA | |||
## 4 NA 2.061905 | |||
## 5 NA NA | |||
## 6 0.000000 NA | |||
## 7 2.380952 NA | |||
## 8 97.619048 NA | |||
## Petal.Length > 5: TRUE (n=42) / sd Petal.Length > 5: TRUE (n=42) / Med | |||
## 1 NA 6.7 | |||
## 2 NA 3.0 | |||
## 3 NA 5.6 | |||
## 4 0.2802023 2.1 | |||
## 5 NA NA | |||
## 6 NA NA | |||
## 7 NA NA | |||
## 8 NA NA | |||
## Petal.Length > 5: TRUE (n=42) / IQR tests / p | |||
## 1 0.850 1.553676e-15 | |||
## 2 0.400 6.927432e-01 | |||
## 3 0.675 2.076978e-21 | |||
## 4 0.475 3.982443e-24 | |||
## 5 NA 2.453675e-26 | |||
## 6 NA NA | |||
## 7 NA NA | |||
## 8 NA NA | |||
## tests / test | |||
## 1 wilcox.test | |||
## 2 wilcox.test | |||
## 3 wilcox.test | |||
## 4 oneway.test(., var.equal = T) | |||
## 5 fisher.test | |||
## 6 <NA> | |||
## 7 <NA> | |||
## 8 <NA> | |||
## 0.3 | |||
<br> | |||
This new version introduces a new internal representation as well as an | |||
entirely new API for desctable ! | |||
The original `desctable` function and usage remains until 1.0, but | |||
begins deprecation. | |||
As with statistical functions, **any** statistical test function defined | |||
in R can be used. | |||
This new API is more flexible and more simple at the same time. Combine | |||
`group_by`, `desc_table`, `desc_tests`, and `desc_output` to create | |||
descriptive and comparative statistics tables and output them to various | |||
formats. | |||
The conditions are that the function | |||
The internal representation is now a simple dataframe in the simple | |||
descriptive case, and a nested dataframe with list-columns for | |||
comparative tables, allowing easier manipulation by the user. | |||
- accepts a formula (`variable ~ grouping_variable`) as a first | |||
positional argument (as is the case with most tests, like `t.test`), | |||
and | |||
- returns an object with a `p.value` element. | |||
## Next | |||
Several convenience function are provided: formula versions for | |||
`chisq.test` and `fisher.test` using generic S3 methods (thus the | |||
behavior of standard calls to `chisq.test` and `fisher.test` are not | |||
modified), and `ANOVA`, a partial application of `oneway.test` with | |||
parameter *var.equal* = T. | |||
- Add a `desc_output` for {gt} | |||
- Implement a way to make tables for survival analysis. | |||
- Implement a way to make tables for multivariate models. | |||
- Allow univariate tests for simple tables | |||
- add a column for totals in grouped tables |
@@ -0,0 +1,40 @@ | |||
url: ~ | |||
template: | |||
bootstrap: 5 | |||
reference: | |||
- title: Core desctable functions | |||
desc: | | |||
Functions to define and render tables | |||
contents: | |||
- desc_table | |||
- desc_tests | |||
- desc_output | |||
- title: Deprecated API | |||
desc: | | |||
Previous API for desctable, that will be deprecated with version 1.0 | |||
contents: | |||
- desctable | |||
- print.desctable | |||
- as.data.frame.desctable | |||
- pander.desctable | |||
- datatable | |||
- stats_default | |||
- title: Convenience functions | |||
desc: | | |||
Functions to facilitate frequent operations | |||
- subtitle: stats | |||
contents: | |||
- IQR | |||
- percent | |||
- is.normal | |||
- subtitle: tests | |||
contents: | |||
- no.test | |||
- chisq.test | |||
- fisher.test | |||
- ANOVA | |||
- subtitle: automatic functions | |||
contents: | |||
- stats_auto | |||
- tests_auto | |||
@@ -0,0 +1 @@ | |||
Subproject commit c80233d437a49b77ca1f162ff5cb9d9a13c162e1 |
@@ -0,0 +1,79 @@ | |||
--- | |||
title: "Desctable" | |||
output: github_document | |||
--- | |||
```{r, echo = F, message = F, warning = F} | |||
knitr::opts_chunk$set(message = F, warning = F) | |||
``` | |||
[![Travis-CI Build Status](https://travis-ci.org/desctable/desctable.svg?branch=master)](https://travis-ci.org/desctable/desctable) [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/desctable)](https://cran.r-project.org/package=desctable) [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/desctable)](https://www.r-pkg.org:443/pkg/desctable) | |||
**Warning to existing users** | |||
*This version introduces a new API that should make the creation of tables more flexible. | |||
The old API is still present but in a deprecated mode. | |||
See the roadmap below, and the website for the new usage. | |||
Suggestions about this change are welcome !* | |||
--- | |||
# Introduction | |||
Desctable aims to be a simple and expressive interface to building statistical tables in R. | |||
# Installation | |||
Install from CRAN with | |||
``` | |||
install.packages("desctable") | |||
``` | |||
or install the development version from github with | |||
``` | |||
devtools::install_github("desctable/desctable") | |||
``` | |||
# Basic usage | |||
Load the package | |||
```{r} | |||
library(desctable) | |||
``` | |||
Simply apply `desc_table` on a dataframe or a grouped dataframe to get a statistical table | |||
```{r} | |||
iris %>% | |||
desc_table() | |||
``` | |||
Declare the statistics you want to see, and give them the name of your choice | |||
```{r} | |||
iris %>% | |||
desc_table("N" = length, | |||
"%" = percent, | |||
mean, | |||
sd) | |||
``` | |||
Create comparative tables, compute statistical tests and output to `pander` for crisp markdown rendering! | |||
```{r} | |||
mtcars %>% | |||
dplyr::mutate(cyl = factor(cyl), | |||
vs = factor(vs, labels = c("V-shaped", "straight")), | |||
am = factor(am, labels = c("automatic", "manual"))) %>% | |||
group_by(am) %>% | |||
desc_table(N = length, | |||
"%" = percent, | |||
"Median" = median, | |||
IQR) %>% | |||
desc_tests(vs = ~chisq.test) %>% | |||
desc_output("pander") | |||
``` | |||
Read more in the [vignette](articles/desctable.html) ! |
@@ -0,0 +1,125 @@ | |||
Desctable | |||
================ | |||
[![Travis-CI Build | |||
Status](https://travis-ci.org/desctable/desctable.svg?branch=master)](https://travis-ci.org/desctable/desctable) | |||
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/desctable)](https://cran.r-project.org/package=desctable) | |||
[![CRAN RStudio mirror | |||
downloads](http://cranlogs.r-pkg.org/badges/desctable)](https://www.r-pkg.org:443/pkg/desctable) | |||
**Warning to existing users** | |||
*This version introduces a new API that should make the creation of | |||
tables more flexible. | |||
The old API is still present but in a deprecated mode. | |||
See the roadmap below, and the website for the new usage. | |||
Suggestions about this change are welcome !* | |||
------------------------------------------------------------------------ | |||
# Introduction | |||
Desctable aims to be a simple and expressive interface to building | |||
statistical tables in R. | |||
# Installation | |||
Install from CRAN with | |||
install.packages("desctable") | |||
or install the development version from github with | |||
devtools::install_github("desctable/desctable") | |||
# Basic usage | |||
Load the package | |||
``` r | |||
library(desctable) | |||
``` | |||
Simply apply `desc_table` on a dataframe or a grouped dataframe to get a | |||
statistical table | |||
``` r | |||
iris %>% | |||
desc_table() | |||
``` | |||
## Variables N % Min Q1 Med Mean Q3 Max | |||
## 1 Sepal.Length 150 NA 4.3 5.1 5.80 5.843333 6.4 7.9 | |||
## 2 Sepal.Width 150 NA 2.0 2.8 3.00 3.057333 3.3 4.4 | |||
## 3 Petal.Length 150 NA 1.0 1.6 4.35 3.758000 5.1 6.9 | |||
## 4 Petal.Width 150 NA 0.1 0.3 1.30 1.199333 1.8 2.5 | |||
## 5 **Species** 150 NA NA NA NA NA NA NA | |||
## 6 **Species**: *setosa* 50 33.33333 NA NA NA NA NA NA | |||
## 7 **Species**: *versicolor* 50 33.33333 NA NA NA NA NA NA | |||
## 8 **Species**: *virginica* 50 33.33333 NA NA NA NA NA NA | |||
## sd IQR | |||
## 1 0.8280661 1.3 | |||
## 2 0.4358663 0.5 | |||
## 3 1.7652982 3.5 | |||
## 4 0.7622377 1.5 | |||
## 5 NA NA | |||
## 6 NA NA | |||
## 7 NA NA | |||
## 8 NA NA | |||
Declare the statistics you want to see, and give them the name of your | |||
choice | |||
``` r | |||
iris %>% | |||
desc_table("N" = length, | |||
"%" = percent, | |||
mean, | |||
sd) | |||
``` | |||
## Variables N % mean sd | |||
## 1 Sepal.Length 150 NA 5.843333 0.8280661 | |||
## 2 Sepal.Width 150 NA 3.057333 0.4358663 | |||
## 3 Petal.Length 150 NA 3.758000 1.7652982 | |||
## 4 Petal.Width 150 NA 1.199333 0.7622377 | |||
## 5 **Species** 150 NA NA NA | |||
## 6 **Species**: *setosa* 50 33.33333 NA NA | |||
## 7 **Species**: *versicolor* 50 33.33333 NA NA | |||
## 8 **Species**: *virginica* 50 33.33333 NA NA | |||
Create comparative tables, compute statistical tests and output to | |||
`pander` for crisp markdown rendering! | |||
``` r | |||
mtcars %>% | |||
dplyr::mutate(cyl = factor(cyl), | |||
vs = factor(vs, labels = c("V-shaped", "straight")), | |||
am = factor(am, labels = c("automatic", "manual"))) %>% | |||
group_by(am) %>% | |||
desc_table(N = length, | |||
"%" = percent, | |||
"Median" = median, | |||
IQR) %>% | |||
desc_tests(vs = ~chisq.test) %>% | |||
desc_output("pander") | |||
``` | |||
| | am = manual</br> (N = 13)</br> N | % | Median | IQR | am = automatic</br> (N = 19)</br> N | % | Median | IQR | p | test | | |||
|:-------------|:---------------------------------|:----|:-------|:-----|:------------------------------------|:----|:-------|:-----|:-------|:------------| | |||
| mpg | 13 | | 23 | 9.4 | 19 | | 17 | 4.2 | ≤ 0.01 | wilcox.test | | |||
| **cyl** | 13 | | | | 19 | | | | ≤ 0.01 | fisher.test | | |||
| 4 | 8 | 62 | | | 3 | 16 | | | | | | |||
| 6 | 3 | 23 | | | 4 | 21 | | | | | | |||
| 8 | 2 | 15 | | | 12 | 63 | | | | | | |||
| disp | 13 | | 120 | 81 | 19 | | 276 | 164 | ≤ 0.01 | wilcox.test | | |||
| hp | 13 | | 109 | 47 | 19 | | 175 | 76 | 0.046 | wilcox.test | | |||
| drat | 13 | | 4.1 | 0.37 | 19 | | 3.1 | 0.63 | ≤ 0.01 | wilcox.test | | |||
| wt | 13 | | 2.3 | 0.84 | 19 | | 3.5 | 0.41 | ≤ 0.01 | wilcox.test | | |||
| qsec | 13 | | 17 | 2.1 | 19 | | 18 | 2 | 0.27 | wilcox.test | | |||
| **vs** | 13 | | | | 19 | | | | 0.56 | chisq.test | | |||
| V-shaped | 6 | 46 | | | 12 | 63 | | | | | | |||
| straight | 7 | 54 | | | 7 | 37 | | | | | | |||
| gear | 13 | | 4 | 1 | 19 | | 3 | 0 | ≤ 0.01 | wilcox.test | | |||
| carb | 13 | | 2 | 3 | 19 | | 3 | 2 | 0.74 | wilcox.test | | |||
Read more in the [vignette](articles/desctable.html) ! |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/output.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{as.data.frame.desctable} | |||
\alias{as.data.frame.desctable} | |||
\title{As.data.frame method for desctable} | |||
@@ -17,3 +17,4 @@ A flat dataframe | |||
\description{ | |||
As.data.frame method for desctable | |||
} | |||
\keyword{deprecated} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/output.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{datatable} | |||
\alias{datatable} | |||
\alias{datatable.default} | |||
@@ -191,3 +191,4 @@ datatable(data.frame(x = Sys.time())) | |||
\references{ | |||
See \url{https://rstudio.github.io/DT/} for the full documentation. | |||
} | |||
\keyword{deprecated} |
@@ -0,0 +1,45 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/output.R | |||
\name{desc_output} | |||
\alias{desc_output} | |||
\title{desc_output} | |||
\usage{ | |||
desc_output(desctable, target = c("df", "pander", "DT"), digits = 2, ...) | |||
} | |||
\arguments{ | |||
\item{desctable}{The desctable to output} | |||
\item{target}{The desired target. One of "df", "pander", or "DT".} | |||
\item{digits}{The number of digits to display. The p values will be simplified under 1E-digits} | |||
\item{...}{Other arguments to pass to \code{data.frame}, \code{pander::pander}, or \code{DT::datatable}} | |||
} | |||
\value{ | |||
The output object (or corresponding side effect) | |||
} | |||
\description{ | |||
Output a desctable to the desired target format | |||
} | |||
\details{ | |||
Output a simple or grouped desctable to a different formats. | |||
Currently available formats are\itemize{ | |||
\item data.frame ("df") | |||
\item pander ("pander") | |||
\item datatable ("DT") | |||
} | |||
All numerical values will be rounded to the digits argument. | |||
If statistical tests are presents, p values below 1E-digits will be replaced with "≤ 1E-digits" | |||
(eg. "≤ 0.01" for values below 0.01 when digits = 2) | |||
} | |||
\seealso{ | |||
\code{\link[DT]{datatable}} | |||
\code{\link[pander]{pander}} | |||
Other desc_table core functions: | |||
\code{\link{desc_table}()}, | |||
\code{\link{desc_tests}()} | |||
} | |||
\concept{desc_table core functions} |
@@ -0,0 +1,98 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
\name{desc_table} | |||
\alias{desc_table} | |||
\alias{desc_table.default} | |||
\alias{desc_table.data.frame} | |||
\alias{desc_table.grouped_df} | |||
\title{Generate a statistics table} | |||
\usage{ | |||
desc_table(data, ..., .auto, .labels) | |||
\method{desc_table}{default}(data, ..., .auto, .labels) | |||
\method{desc_table}{data.frame}(data, ..., .labels = NULL, .auto = stats_auto) | |||
\method{desc_table}{grouped_df}(data, ..., .auto = stats_auto, .labels = NULL) | |||
} | |||
\arguments{ | |||
\item{data}{The dataframe to analyze} | |||
\item{...}{A list of named statistics to apply to each element of the dataframe, or a function returning a list of named statistics} | |||
\item{.auto}{A function to automatically determine appropriate statistics} | |||
\item{.labels}{A named character vector of variable labels} | |||
} | |||
\value{ | |||
A simple or grouped descriptive table | |||
} | |||
\description{ | |||
Generate a statistics table with the chosen statistical functions, nested if called with a grouped dataframe. | |||
} | |||
\section{Stats}{ | |||
The statistical functions to use in the table are passed as additional arguments. | |||
If the argument is named (eg. \code{N = length}) the name will be used as the column title instead of the function | |||
name (here, \strong{N} instead of \strong{length}). | |||
Any R function can be a statistical function, as long as it returns only one value when applied to a vector, or as | |||
many values as there are levels in a factor, plus one. | |||
Users can also use \code{purrr::map}-like formulas as quick anonymous functions (eg. \code{Q1 = ~ quantile(., .25)} to get the first quantile in a | |||
column named \strong{Q1}) | |||
If no statistical function is given to \code{desc_table}, the \code{.auto} argument is used to provide a function | |||
that automatically determines the most appropriate statistical functions to use based on the contents of the table. | |||
} | |||
\section{Labels}{ | |||
\code{.labels} is a named character vector to provide "pretty" labels to variables. | |||
If given, the variable names for which there is a label will be replaced by their corresponding label. | |||
Not all variables need to have a label, and labels for non-existing variables are ignored. | |||
labels must be given in the form \code{c(unquoted_variable_name = "label")} | |||
} | |||
\section{Output}{ | |||
The output is either a dataframe in the case of a simple descriptive table, | |||
or nested dataframes in the case of a comparative table. | |||
} | |||
\examples{ | |||
iris \%>\% | |||
desc_table() | |||
# Does the same as stats_auto here | |||
iris \%>\% | |||
desc_table("N" = length, | |||
"Min" = min, | |||
"Q1" = ~quantile(., .25), | |||
"Med" = median, | |||
"Mean" = mean, | |||
"Q3" = ~quantile(., .75), | |||
"Max" = max, | |||
"sd" = sd, | |||
"IQR" = IQR) | |||
# With grouping on a factor | |||
iris \%>\% | |||
group_by(Species) \%>\% | |||
desc_table(.auto = stats_auto) | |||
} | |||
\seealso{ | |||
\code{\link{stats_auto}} | |||
\code{\link{IQR}} | |||
\code{\link{percent}} | |||
Other desc_table core functions: | |||
\code{\link{desc_output}()}, | |||
\code{\link{desc_tests}()} | |||
} | |||
\concept{desc_table core functions} |
@@ -0,0 +1,57 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
\name{desc_tests} | |||
\alias{desc_tests} | |||
\title{Add tests to a desc_table} | |||
\usage{ | |||
desc_tests(desctable, .auto = tests_auto, .default = NULL, ...) | |||
} | |||
\arguments{ | |||
\item{desctable}{A desc_table} | |||
\item{.auto}{A function to automatically determine the appropriate tests} | |||
\item{.default}{A default fallback test} | |||
\item{...}{A list of statistical tests associated to variable names} | |||
} | |||
\value{ | |||
A desc_table with tests | |||
} | |||
\description{ | |||
Add test statistics to a grouped desc_table, with the tests specified as \code{variable = test}. | |||
} | |||
\section{Tests}{ | |||
The statistical test functions to use in the table are passed as additional named arguments. Tests must be preceded | |||
by a formula tilde (\code{~}). | |||
\code{name = ~test} will apply test \code{test} to variable \code{name}. | |||
Any R test function can be used, as long as it returns an object containing a \code{p.value} element, which is the | |||
case for most tests returning an object of class \code{htest}. | |||
Users can also use \code{purrr::map}-like formulas as quick anonymous functions (eg. \code{~ t.test(., var.equal = T)} to | |||
compute a t test without the Welch correction. | |||
} | |||
\examples{ | |||
iris \%>\% | |||
group_by(Species) \%>\% | |||
desc_table() \%>\% | |||
desc_tests(Sepal.Length = ~kruskal.test, | |||
Sepal.Width = ~oneway.test, | |||
Petal.Length = ~oneway.test(., var.equal = T), | |||
Petal.Length = ~oneway.test(., var.equal = F)) | |||
} | |||
\seealso{ | |||
\code{\link{tests_auto}} | |||
\code{\link{no.test}} | |||
\code{\link{ANOVA}} | |||
Other desc_table core functions: | |||
\code{\link{desc_output}()}, | |||
\code{\link{desc_table}()} | |||
} | |||
\concept{desc_table core functions} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{desctable} | |||
\alias{desctable} | |||
\alias{desctable.default} | |||
@@ -108,3 +108,4 @@ iris \%>\% | |||
\code{\link{datatable.desctable}} | |||
} | |||
\keyword{deprecated} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{flatten_desctable} | |||
\alias{flatten_desctable} | |||
\title{Flatten a desctable to a dataframe recursively} | |||
@@ -15,3 +15,5 @@ A flat dataframe | |||
\description{ | |||
Flatten a desctable to a dataframe recursively | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{head_dataframe} | |||
\alias{head_dataframe} | |||
\title{Build the header for dataframe} | |||
@@ -15,3 +15,5 @@ A names vector | |||
\description{ | |||
Build the header for dataframe | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{head_datatable} | |||
\alias{head_datatable} | |||
\title{Build the header for datatable} | |||
@@ -15,3 +15,5 @@ An htmltools$tags object containing the header | |||
\description{ | |||
Build the header for datatable | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{head_pander} | |||
\alias{head_pander} | |||
\title{Build the header for pander} | |||
@@ -15,3 +15,5 @@ A names vector | |||
\description{ | |||
Build the header for pander | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{header} | |||
\alias{header} | |||
\title{Build header} | |||
@@ -18,3 +18,5 @@ A header object in the output format | |||
Take a desctable object and create a suitable header for the mentionned output. | |||
Output can be one of "pander", "datatable", or "dataframe". | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{headerList} | |||
\alias{headerList} | |||
\title{build a header list object} | |||
@@ -15,3 +15,5 @@ a nested list of headers with colspans | |||
\description{ | |||
build a header list object | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -20,3 +20,4 @@ The combined vector | |||
The vectors in the y list will be inserted | |||
at positions respectively *after* the x[position] element of x | |||
} | |||
\keyword{internal} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/output.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{pander.desctable} | |||
\alias{pander.desctable} | |||
\title{Pander method for desctable} | |||
@@ -41,3 +41,4 @@ Uses \code{pandoc.table}, with some default parameters (\code{digits = 2}, \code | |||
\seealso{ | |||
\code{\link{pandoc.table}} | |||
} | |||
\keyword{deprecated} |
@@ -0,0 +1,30 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/deprecated.R | |||
\name{parse_formula} | |||
\alias{parse_formula} | |||
\title{Parse a formula} | |||
\usage{ | |||
parse_formula(x, f) | |||
} | |||
\arguments{ | |||
\item{x}{The variable to test it on} | |||
\item{f}{A formula to parse} | |||
} | |||
\value{ | |||
A function to use as a stat/test | |||
} | |||
\description{ | |||
Parse a formula defining the conditions to pick a stat/test | |||
} | |||
\details{ | |||
Parse a formula defining the conditions to pick a stat/test | |||
and return the function to use. | |||
The formula is to be given in the form of | |||
conditional ~ T | F | |||
and conditions can be nested such as | |||
conditional1 ~ (conditional2 ~ T | F) | F | |||
The FALSE option can be omitted, and the TRUE can be replaced with NA | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/output.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{print.desctable} | |||
\alias{print.desctable} | |||
\title{Print method for desctable} | |||
@@ -17,3 +17,4 @@ A flat dataframe | |||
\description{ | |||
Print method for desctable | |||
} | |||
\keyword{deprecated} |
@@ -1,11 +1,12 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/imports.R | |||
% Please edit documentation in R/deprecated.R, R/imports.R | |||
\docType{import} | |||
\name{reexports} | |||
\alias{reexports} | |||
\alias{pander} | |||
\alias{\%>\%} | |||
\alias{group_by} | |||
\alias{pander} | |||
\alias{!!!} | |||
\title{Objects exported from other packages} | |||
\keyword{internal} | |||
\description{ | |||
@@ -16,5 +17,7 @@ below to see their documentation. | |||
\item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}, \code{\link[dplyr]{group_by}}} | |||
\item{pander}{\code{\link[pander]{pander}}} | |||
\item{rlang}{\code{\link[rlang:splice-operator]{!!!}}} | |||
}} | |||
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{set_desctable_class} | |||
\alias{set_desctable_class} | |||
\title{Set the "desctable" class to the passed object} | |||
@@ -15,3 +15,5 @@ The object with the class "desctable" | |||
\description{ | |||
Set the "desctable" class to the passed object | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -22,3 +22,4 @@ to produce a single statistics column. | |||
The result is either a numeric vector, or a character vector if | |||
the content of the column is not made entirely of numbers. | |||
} | |||
\keyword{internal} |
@@ -19,3 +19,4 @@ If stats is a list of functions or purrr::map like formulas, use them. | |||
If it is a single function, use it with the entire data as | |||
its argument to produce a list of statistical functions to use. | |||
} | |||
\keyword{internal} |
@@ -26,3 +26,4 @@ Applying the function on a factor should return nlevels + 1 value, or one value | |||
See \code{parse_formula} for the usage for formulaes. | |||
} | |||
\keyword{internal} |
@@ -0,0 +1,20 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/stats.R | |||
\name{stats_auto} | |||
\alias{stats_auto} | |||
\title{Function to create a list of statistics to use in desctable} | |||
\usage{ | |||
stats_auto(data) | |||
} | |||
\arguments{ | |||
\item{data}{The dataframe to apply the statistic to} | |||
} | |||
\value{ | |||
A list of statistics to use, assessed from the content of the dataframe | |||
} | |||
\description{ | |||
This function takes a dataframe as argument and returns a list of statistcs in the form accepted by desctable. | |||
} | |||
\details{ | |||
You can define your own automatic function, as long as it takes a dataframe as argument and returns a list of functions, or formulas defining conditions to use a stat function. | |||
} |
@@ -1,37 +1,24 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/stats.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{stats_default} | |||
\alias{stats_default} | |||
\alias{stats_normal} | |||
\alias{stats_nonnormal} | |||
\alias{stats_auto} | |||
\title{Functions to create a list of statistics to use in desctable} | |||
\title{Define a list of default statistics} | |||
\usage{ | |||
stats_default(data) | |||
stats_normal(data) | |||
stats_nonnormal(data) | |||
stats_auto(data) | |||
} | |||
\arguments{ | |||
\item{data}{The dataframe to apply the statistic to} | |||
\item{data}{A dataframe} | |||
} | |||
\value{ | |||
A list of statistics to use, potentially assessed from the dataframe | |||
A list of statistical functions | |||
} | |||
\description{ | |||
These functions take a dataframe as argument and return a list of statistcs in the form accepted by desctable. | |||
} | |||
\details{ | |||
Already defined are | |||
\enumerate{ | |||
\item stats_default with length, \%, mean, sd, med and IQR | |||
\item stats_normal with length, \%, mean and sd | |||
\item stats_nonnormal with length, %, median and IQR | |||
\item stats_auto, which picks stats depending of the data | |||
} | |||
You can define your own automatic functions, as long as they take a dataframe as argument and return a list of functions or formulas defining conditions to use a stat function. | |||
Define a list of default statistics | |||
} | |||
\keyword{deprecated} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{subNames} | |||
\alias{subNames} | |||
\title{Create the subtables names} | |||
@@ -18,3 +18,5 @@ A character vector with the names for the subtables | |||
Create the subtables names, as | |||
factor: level (n=sub-group length) | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -1,5 +1,5 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
% Please edit documentation in R/deprecated.R | |||
\name{subTable} | |||
\alias{subTable} | |||
\title{Create a subtable in a grouped desctable} | |||
@@ -21,3 +21,5 @@ A nested list of statTables and testColumns | |||
\description{ | |||
Create a subtable in a grouped desctable | |||
} | |||
\keyword{deprecated} | |||
\keyword{internal} |
@@ -19,3 +19,4 @@ A numeric vector of pvalues | |||
\description{ | |||
Create the pvalues column | |||
} | |||
\keyword{internal} |
@@ -21,3 +21,4 @@ Transform a function into a valid test function for the table | |||
Applying the function on a numerical vector should return one value | |||
Applying the function on a factor should return nlevels + 1 value, or one value per factor level | |||
} | |||
\keyword{internal} |
@@ -2,7 +2,7 @@ | |||
% Please edit documentation in R/tests.R | |||
\name{tests_auto} | |||
\alias{tests_auto} | |||
\title{Functions to choose a statistical test} | |||
\title{Function to choose a statistical test} | |||
\usage{ | |||
tests_auto(var, grp) | |||
} | |||
@@ -15,8 +15,9 @@ tests_auto(var, grp) | |||
A statistical test function | |||
} | |||
\description{ | |||
These functions take a variable and a grouping variable as arguments, and return a statistcal test to use, expressed as a single-term formula. | |||
This function takes a variable and a grouping variable as arguments, and returns a statistcal test to use, expressed as a single-term formula. | |||
} | |||
\details{ | |||
Currently, only \code{tests_auto} is defined, and picks between t test, wilcoxon, anova, kruskal-wallis and fisher depending on the number of groups, the type of the variable, the normality and homoskedasticity of the distributions. | |||
This function uses appropriate non-parametric tests depending on the number of levels (wilcoxon.test for two levels | |||
and kruskal.test for more), and fisher.test with fallback on chisq.test on error for factors. | |||
} |
@@ -23,3 +23,4 @@ labels is an option named character vector used to make the table prettier. | |||
If given, the variable names for which there is a label will be replaced by their corresponding label. | |||
Not all variables need to have a label, and labels for non-existing variables are ignored. | |||
} | |||
\keyword{internal} |
@@ -0,0 +1,22 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{which.desctable} | |||
\alias{which.desctable} | |||
\title{Is the object possibly a desctable?} | |||
\usage{ | |||
which.desctable(desctable) | |||
} | |||
\arguments{ | |||
\item{desctable}{A potential desctable to check} | |||
} | |||
\value{ | |||
The type of desctable or FALSE | |||
} | |||
\description{ | |||
Check if the object is produced by desc_table. | |||
Return a string: | |||
- simple | |||
- grouped | |||
or FALSE if not a desctable | |||
} | |||
\keyword{internal} |
@@ -17,339 +17,111 @@ options(DT.options = list(#scrollX = T, | |||
fixedColumns = T)) | |||
knitr::opts_chunk$set(message = F, warning = F, screenshot.force = F) | |||
``` | |||
Desctable is a comprehensive descriptive and comparative tables generator for R. | |||
Every person doing data analysis has to create tables for descriptive summaries of data (a.k.a. Table.1), or comparative tables. | |||
Many packages, such as the aptly named **tableone**, address this issue. However, they often include hard-coded behaviors, have outputs not easily manipulable with standard R tools, or their syntax are out-of-style (e.g. the argument order makes them difficult to use with the pipe (`%>%`)). | |||
Enter **desctable**, a package built with the following objectives in mind: | |||
* generate descriptive and comparative statistics tables with nesting | |||
* keep the syntax as simple as possible | |||
* have good reasonable defaults | |||
* be entirely customizable, using standard R tools and functions | |||
* produce the simplest (as a data structure) output possible | |||
* provide helpers for different outputs | |||
* integrate with "modern" R usage, and the **tidyverse** set of tools | |||
* apply functional paradigms | |||
---- | |||
Desctable aims to be a simple and expressive interface to building statistical tables in R. | |||
# Descriptive tables | |||
## Simple usage | |||
**desctable** uses and exports the pipe (`%>%`) operator (from packages **magrittr** and **dplyr** fame), though it is not mandatory to use it. | |||
The single interface to the package is its eponymous `desctable` function. | |||
## Simple | |||
When used on a data.frame, it returns a descriptive table: | |||
Creating a descriptive table with desctable is as easy as | |||
```{r} | |||
iris %>% | |||
desctable() | |||
desctable(mtcars) | |||
desc_table() | |||
``` | |||
<br> | |||
As you can see with these two examples, `desctable` describes every variable, with individual levels for factors. It picks statistical functions depending on the type and distribution of the variables in the data, and applies those statistical functions only on the relevant variables. | |||
## Output | |||
The object produced by `desctable` is in fact a list of data.frames, with a "desctable" class. | |||
Methods for reduction to a simple dataframe (`as.data.frame`, automatically used for printing), conversion to markdown (`pander`), and interactive html output with **DT** (`datatable`) are provided: | |||
By default, `desc_table` will select the most appropriate statistics for the given table, but you can choose your own as easily | |||
```{r} | |||
iris %>% | |||
desctable() %>% | |||
pander() | |||
mtcars %>% | |||
desctable() %>% | |||
datatable() | |||
desc_table(N = length, | |||
mean, | |||
sd) | |||
``` | |||
<br> | |||
To use `pander` you need to load the package yourself. | |||
Calls to `pander` and `datatable` with "regular" dataframes will not be affected by the defaults used in the package, and you can modify these defaults for **desctable** objects. | |||
The `datatable` wrapper function for desctable objects comes with some default options and formatting such as freezing the row names and table header, export buttons, and rounding of values. Both `pander` and `datatable` wrapper take a *digits* argument to set the number of decimals to show. (`pander` uses the *digits*, *justify* and *missing* arguments of `pandoc.table`, whereas `datatable` calls `prettyNum` with the `digits` parameter, and removes `NA` values. You can set `digits = NULL` if you want the full table and format it yourself) | |||
Subsequent outputs in this vignette will use **DT**. | |||
## Advanced usage | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* always show N | |||
* if there are factors, show % | |||
* if there are normally distributed variables, show Mean and SD | |||
* if there are non-normally distributed variables, show Median and IQR | |||
For each variable in the table, compute the relevant statistical functions in that list (non-applicable functions will safely return `NA`). | |||
You can specify the statistical functions yourself with the *stats* argument. This argument can either be: | |||
* a function for automatic selection of appropriate statistical functions, depending on the data | |||
* a named list of functions/formulas | |||
The functions/formulas leverage the **tidyverse** way of working with anonymous functions, i.e.: | |||
If a *function*, is is used as is. | |||
If a *formula*, e.g. '~ .x + 1' or `~ . + 1`, it is converted to a function. There are three ways to refer to the arguments: | |||
* For a single argument function, use '.' | |||
* For a two argument function, use '.x' and '.y' | |||
* For more arguments, use '..1', '..2', '..3' etc | |||
This syntax allows you to create very compact anonymous functions, and is the same as in the `map` family of functions from **purrr**. | |||
**Conditional formulas (`condition ~ if_T | if F`) from previous versions are no longer supported!** | |||
### Automatic function | |||
As you can see with `N = length`, you can give a meaningful name to the column instead of the name of the function. | |||
You are not limited in your options, and can use any statistical function that exists in R, even your own! | |||
The default value for the *stats* argument is `stats_auto`, provided in the package. | |||
Several other "automatic statistical functions" are defined in this package: `stats_auto`, `stats_default`, `stats_normal`, `stats_nonnormal`. | |||
You can also provide your own automatic function, which needs to | |||
* accept a dataframe as its argument (whether to use this dataframe or not in the function is your choice), and | |||
* return a named list of statistical functions to use, as defined in the subsequent paragraphs. | |||
You can also use `purrr::map`-like formulas, for example to get the first and third quartiles here | |||
```{r} | |||
# Strictly equivalent to iris %>% desctable() %>% datatable() | |||
iris %>% | |||
desctable(stats = stats_auto) %>% | |||
datatable() | |||
``` | |||
<br> | |||
For reference, here is the body of the `stats_auto` function in the package: | |||
```{r, echo = F} | |||
print(stats_auto) | |||
``` | |||
<br> | |||
### Statistical functions | |||
Statistical functions can be **any** function defined in R that you want to use, such as `length` or `mean`. | |||
The only condition is that they return a single numerical value. One exception is when they return a vector of length `1 + nlevels(x)` when applied to factors, as is needed for the `percent` function. | |||
As mentioned above, they need to be used inside a **named list**, such as | |||
```{r} | |||
mtcars %>% | |||
desctable(stats = list("N" = length, "Mean" = mean, "SD" = sd)) %>% | |||
datatable() | |||
desc_table(N = length, | |||
"%" = percent, | |||
Q1 = ~ quantile(., .25), | |||
Med = median, | |||
Q3 = ~ quantile(., .75)) | |||
``` | |||
<br> | |||
The names will be used as column headers in the resulting table, and the functions will be applied safely on the variables (errors return `NA`, and for factors the function will be used on individual levels). | |||
Several convenience functions are included in this package. | |||
* `percent`, which prints percentages of levels in a factor | |||
* `IQR`, which re-implements `stats::IQR` but works better with `NA` values | |||
* `is.normal`, which tests for normality using the following method: `length(na.omit(x)) > 30 & shapiro.test(x)$p.value > .1` | |||
Be aware that **all functions will be used on variables stripped of their `NA` values!** | |||
This is necessary for most statistical functions to be useful, and makes **N** (`length`) show only the number of observations in the dataset for each variable. | |||
### Labels | |||
It is often the case that variable names are not "pretty" enough to be used as-is in a table. | |||
Although you could still edit the variable labels in the table afterwards using sub-setting or string replacement functions, we provide a facility for this using the **labels** argument. | |||
## By group | |||
The **labels** argument is a named character vector associating variable names and labels. | |||
You don't need to provide labels for all the variables, and extra labels will be silently discarded. This allows you to define a "global" labels vector and use it for multiple tables even after variable selections. | |||
```{r} | |||
mtlabels <- c(mpg = "Miles/(US) gallon", | |||
cyl = "Number of cylinders", | |||
disp = "Displacement (cu.in.)", | |||
hp = "Gross horsepower", | |||
drat = "Rear axle ratio", | |||
wt = "Weight (1000 lbs)", | |||
qsec = "¼ mile time", | |||
vs = "V/S", | |||
am = "Transmission", | |||
gear = "Number of forward gears", | |||
carb = "Number of carburetors") | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
desctable(labels = mtlabels) %>% | |||
datatable() | |||
``` | |||
<br> | |||
---- | |||
# Comparative tables | |||
## Simple usage | |||
Creating a comparative table (between groups defined by a factor) using `desctable` is as easy as creating a descriptive table. | |||
It leverages the `group_by` function from **dplyr**: | |||
You can also create nested descriptive tables by applying `group_by` on your dataframe | |||
```{r} | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable() -> iris_by_Species | |||
iris_by_Species | |||
desc_table() | |||
``` | |||
<br> | |||
The result is a table containing a descriptive sub-table for each level of the grouping factor (the statistical functions rules are applied to each sub-table independently), with the statistical tests performed, and their p values. | |||
When displayed as a flat dataframe, the grouping header appears in each variable name. | |||
You can also see the grouping headers by inspecting the resulting object, which is a nested list of dataframes, each dataframe being named after the grouping factor and its levels (with sample size for each). | |||
However, because of the grouping, you can see the resulting object is not a simple data frame, but a nested dataframe (see tidyr::nest and tidyr::unnest). | |||
*desctable* provides output functions to format this object to various outputs. | |||
Right now, desctable supports `data.frame`, `pander`, and `DT` outputs. These output functions will also round numerical values, as well as p values for tests (we'll see `desc_tests` a bit later). | |||
```{r} | |||
str(iris_by_Species) | |||
mtcars %>% | |||
group_by(am) %>% | |||
desc_table() %>% | |||
desc_output("df") | |||
``` | |||
<br> | |||
You can specify groups based on any variable, not only factors: | |||
```{r} | |||
# With pander output | |||
mtcars %>% | |||
group_by(cyl) %>% | |||
desctable() %>% | |||
pander() | |||
group_by(am) %>% | |||
desc_table() %>% | |||
desc_output("pander") | |||
``` | |||
<br> | |||
You can also specify groups based on an expression | |||
```{r} | |||
# With datatable output | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable() %>% | |||
datatable() | |||
``` | |||
<br> | |||
Multiple nested groups are also possible: | |||
```{r, message = F, warning = F} | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
group_by(vs, am, cyl) %>% | |||
desctable() %>% | |||
datatable() | |||
group_by(am) %>% | |||
desc_table() %>% | |||
desc_output("DT") | |||
``` | |||
<br> | |||
In the case of nested groups (a.k.a. sub-group analysis), statistical tests are performed only between the groups of the deepest grouping level. | |||
Statistical tests are automatically selected depending on the data and the grouping factor. | |||
## Advanced usage | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* if the variable is a factor, use `fisher.test` | |||
* if the grouping factor has only one level, use the provided `no.test` (which does nothing) | |||
* if the grouping factor has two levels | |||
* and the variable presents homoskedasticity (p value for `var.test` > .1) and normality of distribution in both groups, use `t.test(var.equal = T)` | |||
* and the variable does not present homoskedasticity (p value for `var.test` < .1) but normality of distribution in both groups, use `t.test(var.equal = F)` | |||
* else use `wilcox.test` | |||
* if the grouping factor has more than two levels | |||
* and the variable presents homoskedasticity (p value for `bartlett.test` > .1) and normality of distribution in all groups, use `oneway.test(var.equal = T)` | |||
* and the variable does not present homoskedasticity (p value for `bartlett.test` < .1) but normality of distribution in all groups, use `oneway.test(var.equal = F)` | |||
* else use `kruskal.test` | |||
You can specify the statistical test functions yourself with the *tests* argument. This argument can either be: | |||
* a function for automatic selection of appropriate statistical test functions, depending on the data | |||
* a named list of statistical test functions | |||
Please note that the statistical test functions **must** be given as *formulas* so as to capture the name of the test to display in the table. | |||
**purrr** style formulas are also actepted, as with the statistical functions. | |||
This also allows to specify optional arguments of such functions, and go around non-standard test functions (see **Statistical test functions**). | |||
### Automatic function | |||
The default value for the *tests* argument is `tests_auto`, provided in the package. | |||
You can also provide your own automatic function, which needs to | |||
* accept a variable and a grouping factor as its arguments, and | |||
* return a single-term formula containing a statistical test function. | |||
# Comparative tables | |||
This function will be used on every variable and every grouping factor to determine the appropriate test. | |||
You can add tests to a grouped descriptive desctable | |||
```{r} | |||
# Strictly equivalent to iris %>% group_by(Species) %>% desctable() %>% datatable() | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable(tests = tests_auto) %>% | |||
datatable() | |||
``` | |||
<br> | |||
For reference, here is the body of the `tests_auto` function in the package: | |||
```{r, echo = F} | |||
print(tests_auto) | |||
group_by(Petal.Length > 5) %>% | |||
desc_table() %>% | |||
desc_tests() %>% | |||
desc_output("DT") | |||
``` | |||
<br> | |||
### Statistical test functions | |||
You can provide a named list of statistical functions, but here the mechanism is a bit different from the *stats* argument. | |||
The list must contain either `.auto` or `.default`. | |||
* `.auto` needs to be an automatic function, such as `tests_auto`. It will be used by default on all variables to select a test | |||
* `.default` needs to be a single-term formula containing a statistical test function that will be used on all variables | |||
You can also provide overrides to use specific tests for specific variables. | |||
This is done using list items named as the variable and containing a single-term formula function. | |||
By default, `desc_tests` will select the most appropriate statistical tests for the given table, but you can choose your own as easily. | |||
For example, to compare Sepal.Width using a Student's t test | |||
```{r} | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Species = ~chisq.test)) %>% | |||
datatable() | |||
desc_table(mean, sd, median, IQR) %>% | |||
desc_tests(Sepal.Width = ~t.test) %>% | |||
desc_output("DT") | |||
``` | |||
<br> | |||
```{r} | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
group_by(am) %>% | |||
desctable(tests = list(.default = ~wilcox.test, | |||
mpg = ~t.test)) %>% | |||
datatable() | |||
``` | |||
Note that the name of the test **must** be prepended with a tilde (`~`) in all cases! | |||
Here's an example of **purrr** style function: | |||
You can also use `purrr::map`-like formulas to change tests options | |||
```{r} | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Petal.Width = ~oneway.test(., var.equal = T))) | |||
desc_table(mean, sd, median, IQR) %>% | |||
desc_tests(Sepal.Width = ~t.test(., var.equal = T)) %>% | |||
desc_output("DT") | |||
``` | |||
<br> | |||
As with statistical functions, **any** statistical test function defined in R can be used. | |||
The conditions are that the function | |||
* accepts a formula (`variable ~ grouping_variable`) as a first positional argument (as is the case with most tests, like `t.test`), and | |||
* returns an object with a `p.value` element. | |||
Several convenience function are provided: formula versions for `chisq.test` and `fisher.test` using generic S3 methods (thus the behavior of standard calls to `chisq.test` and `fisher.test` are not modified), and `ANOVA`, a partial application of `oneway.test` with parameter *var.equal* = T. | |||
See the [tips and tricks](tips.html) to go further. |
@@ -0,0 +1,355 @@ | |||
--- | |||
title: "desctable usage vignette (deprecated)" | |||
output: rmarkdown::html_vignette | |||
vignette: > | |||
%\VignetteIndexEntry{desctable usage vignette (deprecated)} | |||
%\VignetteEngine{knitr::rmarkdown} | |||
%\VignetteEncoding{UTF-8} | |||
--- | |||
```{r, echo = F, message = F, warning = F} | |||
library(desctable) | |||
options(DT.options = list(#scrollX = T, | |||
info = F, | |||
search = F, | |||
dom = "Brtip", | |||
fixedColumns = T)) | |||
knitr::opts_chunk$set(message = F, warning = F, screenshot.force = F) | |||
``` | |||
Desctable is a comprehensive descriptive and comparative tables generator for R. | |||
Every person doing data analysis has to create tables for descriptive summaries of data (a.k.a. Table.1), or comparative tables. | |||
Many packages, such as the aptly named **tableone**, address this issue. However, they often include hard-coded behaviors, have outputs not easily manipulable with standard R tools, or their syntax are out-of-style (e.g. the argument order makes them difficult to use with the pipe (`%>%`)). | |||
Enter **desctable**, a package built with the following objectives in mind: | |||
* generate descriptive and comparative statistics tables with nesting | |||
* keep the syntax as simple as possible | |||
* have good reasonable defaults | |||
* be entirely customizable, using standard R tools and functions | |||
* produce the simplest (as a data structure) output possible | |||
* provide helpers for different outputs | |||
* integrate with "modern" R usage, and the **tidyverse** set of tools | |||
* apply functional paradigms | |||
---- | |||
# Descriptive tables | |||
## Simple usage | |||
**desctable** uses and exports the pipe (`%>%`) operator (from packages **magrittr** and **dplyr** fame), though it is not mandatory to use it. | |||
The single interface to the package is its eponymous `desctable` function. | |||
When used on a data.frame, it returns a descriptive table: | |||
```{r} | |||
iris %>% | |||
desctable() | |||
desctable(mtcars) | |||
``` | |||
<br> | |||
As you can see with these two examples, `desctable` describes every variable, with individual levels for factors. It picks statistical functions depending on the type and distribution of the variables in the data, and applies those statistical functions only on the relevant variables. | |||
## Output | |||
The object produced by `desctable` is in fact a list of data.frames, with a "desctable" class. | |||
Methods for reduction to a simple dataframe (`as.data.frame`, automatically used for printing), conversion to markdown (`pander`), and interactive html output with **DT** (`datatable`) are provided: | |||
```{r} | |||
iris %>% | |||
desctable() %>% | |||
pander() | |||
mtcars %>% | |||
desctable() %>% | |||
datatable() | |||
``` | |||
<br> | |||
To use `pander` you need to load the package yourself. | |||
Calls to `pander` and `datatable` with "regular" dataframes will not be affected by the defaults used in the package, and you can modify these defaults for **desctable** objects. | |||
The `datatable` wrapper function for desctable objects comes with some default options and formatting such as freezing the row names and table header, export buttons, and rounding of values. Both `pander` and `datatable` wrapper take a *digits* argument to set the number of decimals to show. (`pander` uses the *digits*, *justify* and *missing* arguments of `pandoc.table`, whereas `datatable` calls `prettyNum` with the `digits` parameter, and removes `NA` values. You can set `digits = NULL` if you want the full table and format it yourself) | |||
Subsequent outputs in this vignette will use **DT**. | |||
## Advanced usage | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* always show N | |||
* if there are factors, show % | |||
* if there are normally distributed variables, show Mean and SD | |||
* if there are non-normally distributed variables, show Median and IQR | |||
For each variable in the table, compute the relevant statistical functions in that list (non-applicable functions will safely return `NA`). | |||
You can specify the statistical functions yourself with the *stats* argument. This argument can either be: | |||
* a function for automatic selection of appropriate statistical functions, depending on the data | |||
* a named list of functions/formulas | |||
The functions/formulas leverage the **tidyverse** way of working with anonymous functions, i.e.: | |||
If a *function*, is is used as is. | |||
If a *formula*, e.g. '~ .x + 1' or `~ . + 1`, it is converted to a function. There are three ways to refer to the arguments: | |||
* For a single argument function, use '.' | |||
* For a two argument function, use '.x' and '.y' | |||
* For more arguments, use '..1', '..2', '..3' etc | |||
This syntax allows you to create very compact anonymous functions, and is the same as in the `map` family of functions from **purrr**. | |||
**Conditional formulas (`condition ~ if_T | if F`) from previous versions are no longer supported!** | |||
### Automatic function | |||
The default value for the *stats* argument is `stats_auto`, provided in the package. | |||
Several other "automatic statistical functions" are defined in this package: `stats_auto`, `stats_default`, `stats_normal`, `stats_nonnormal`. | |||
You can also provide your own automatic function, which needs to | |||
* accept a dataframe as its argument (whether to use this dataframe or not in the function is your choice), and | |||
* return a named list of statistical functions to use, as defined in the subsequent paragraphs. | |||
```{r} | |||
# Strictly equivalent to iris %>% desctable() %>% datatable() | |||
iris %>% | |||
desctable(stats = stats_auto) %>% | |||
datatable() | |||
``` | |||
<br> | |||
For reference, here is the body of the `stats_auto` function in the package: | |||
```{r, echo = F} | |||
print(stats_auto) | |||
``` | |||
<br> | |||
### Statistical functions | |||
Statistical functions can be **any** function defined in R that you want to use, such as `length` or `mean`. | |||
The only condition is that they return a single numerical value. One exception is when they return a vector of length `1 + nlevels(x)` when applied to factors, as is needed for the `percent` function. | |||
As mentioned above, they need to be used inside a **named list**, such as | |||
```{r} | |||
mtcars %>% | |||
desctable(stats = list("N" = length, "Mean" = mean, "SD" = sd)) %>% | |||
datatable() | |||
``` | |||
<br> | |||
The names will be used as column headers in the resulting table, and the functions will be applied safely on the variables (errors return `NA`, and for factors the function will be used on individual levels). | |||
Several convenience functions are included in this package. | |||
* `percent`, which prints percentages of levels in a factor | |||
* `IQR`, which re-implements `stats::IQR` but works better with `NA` values | |||
* `is.normal`, which tests for normality using the following method: `length(na.omit(x)) > 30 & shapiro.test(x)$p.value > .1` | |||
Be aware that **all functions will be used on variables stripped of their `NA` values!** | |||
This is necessary for most statistical functions to be useful, and makes **N** (`length`) show only the number of observations in the dataset for each variable. | |||
### Labels | |||
It is often the case that variable names are not "pretty" enough to be used as-is in a table. | |||
Although you could still edit the variable labels in the table afterwards using sub-setting or string replacement functions, we provide a facility for this using the **labels** argument. | |||
The **labels** argument is a named character vector associating variable names and labels. | |||
You don't need to provide labels for all the variables, and extra labels will be silently discarded. This allows you to define a "global" labels vector and use it for multiple tables even after variable selections. | |||
```{r} | |||
mtlabels <- c(mpg = "Miles/(US) gallon", | |||
cyl = "Number of cylinders", | |||
disp = "Displacement (cu.in.)", | |||
hp = "Gross horsepower", | |||
drat = "Rear axle ratio", | |||
wt = "Weight (1000 lbs)", | |||
qsec = "¼ mile time", | |||
vs = "V/S", | |||
am = "Transmission", | |||
gear = "Number of forward gears", | |||
carb = "Number of carburetors") | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
desctable(labels = mtlabels) %>% | |||
datatable() | |||
``` | |||
<br> | |||
---- | |||
# Comparative tables | |||
## Simple usage | |||
Creating a comparative table (between groups defined by a factor) using `desctable` is as easy as creating a descriptive table. | |||
It leverages the `group_by` function from **dplyr**: | |||
```{r} | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable() -> iris_by_Species | |||
iris_by_Species | |||
``` | |||
<br> | |||
The result is a table containing a descriptive sub-table for each level of the grouping factor (the statistical functions rules are applied to each sub-table independently), with the statistical tests performed, and their p values. | |||
When displayed as a flat dataframe, the grouping header appears in each variable name. | |||
You can also see the grouping headers by inspecting the resulting object, which is a nested list of dataframes, each dataframe being named after the grouping factor and its levels (with sample size for each). | |||
```{r} | |||
str(iris_by_Species) | |||
``` | |||
<br> | |||
You can specify groups based on any variable, not only factors: | |||
```{r} | |||
# With pander output | |||
mtcars %>% | |||
group_by(cyl) %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
<br> | |||
You can also specify groups based on an expression | |||
```{r} | |||
# With datatable output | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable() %>% | |||
datatable() | |||
``` | |||
<br> | |||
Multiple nested groups are also possible: | |||
```{r, message = F, warning = F} | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
group_by(vs, am, cyl) %>% | |||
desctable() %>% | |||
datatable() | |||
``` | |||
<br> | |||
In the case of nested groups (a.k.a. sub-group analysis), statistical tests are performed only between the groups of the deepest grouping level. | |||
Statistical tests are automatically selected depending on the data and the grouping factor. | |||
## Advanced usage | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* if the variable is a factor, use `fisher.test` | |||
* if the grouping factor has only one level, use the provided `no.test` (which does nothing) | |||
* if the grouping factor has two levels | |||
* and the variable presents homoskedasticity (p value for `var.test` > .1) and normality of distribution in both groups, use `t.test(var.equal = T)` | |||
* and the variable does not present homoskedasticity (p value for `var.test` < .1) but normality of distribution in both groups, use `t.test(var.equal = F)` | |||
* else use `wilcox.test` | |||
* if the grouping factor has more than two levels | |||
* and the variable presents homoskedasticity (p value for `bartlett.test` > .1) and normality of distribution in all groups, use `oneway.test(var.equal = T)` | |||
* and the variable does not present homoskedasticity (p value for `bartlett.test` < .1) but normality of distribution in all groups, use `oneway.test(var.equal = F)` | |||
* else use `kruskal.test` | |||
You can specify the statistical test functions yourself with the *tests* argument. This argument can either be: | |||
* a function for automatic selection of appropriate statistical test functions, depending on the data | |||
* a named list of statistical test functions | |||
Please note that the statistical test functions **must** be given as *formulas* so as to capture the name of the test to display in the table. | |||
**purrr** style formulas are also actepted, as with the statistical functions. | |||
This also allows to specify optional arguments of such functions, and go around non-standard test functions (see **Statistical test functions**). | |||
### Automatic function | |||
The default value for the *tests* argument is `tests_auto`, provided in the package. | |||
You can also provide your own automatic function, which needs to | |||
* accept a variable and a grouping factor as its arguments, and | |||
* return a single-term formula containing a statistical test function. | |||
This function will be used on every variable and every grouping factor to determine the appropriate test. | |||
```{r} | |||
# Strictly equivalent to iris %>% group_by(Species) %>% desctable() %>% datatable() | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable(tests = tests_auto) %>% | |||
datatable() | |||
``` | |||
<br> | |||
For reference, here is the body of the `tests_auto` function in the package: | |||
```{r, echo = F} | |||
print(tests_auto) | |||
``` | |||
<br> | |||
### Statistical test functions | |||
You can provide a named list of statistical functions, but here the mechanism is a bit different from the *stats* argument. | |||
The list must contain either `.auto` or `.default`. | |||
* `.auto` needs to be an automatic function, such as `tests_auto`. It will be used by default on all variables to select a test | |||
* `.default` needs to be a single-term formula containing a statistical test function that will be used on all variables | |||
You can also provide overrides to use specific tests for specific variables. | |||
This is done using list items named as the variable and containing a single-term formula function. | |||
```{r} | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Species = ~chisq.test)) %>% | |||
datatable() | |||
``` | |||
<br> | |||
```{r} | |||
mtcars %>% | |||
dplyr::mutate(am = factor(am, labels = c("Automatic", "Manual"))) %>% | |||
group_by(am) %>% | |||
desctable(tests = list(.default = ~wilcox.test, | |||
mpg = ~t.test)) %>% | |||
datatable() | |||
``` | |||
Here's an example of **purrr** style function: | |||
```{r} | |||
iris %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Petal.Width = ~oneway.test(., var.equal = T))) | |||
``` | |||
<br> | |||
As with statistical functions, **any** statistical test function defined in R can be used. | |||
The conditions are that the function | |||
* accepts a formula (`variable ~ grouping_variable`) as a first positional argument (as is the case with most tests, like `t.test`), and | |||
* returns an object with a `p.value` element. | |||
Several convenience function are provided: formula versions for `chisq.test` and `fisher.test` using generic S3 methods (thus the behavior of standard calls to `chisq.test` and `fisher.test` are not modified), and `ANOVA`, a partial application of `oneway.test` with parameter *var.equal* = T. |
@@ -0,0 +1,208 @@ | |||
--- | |||
title: "desctable tips" | |||
output: rmarkdown::html_vignette | |||
vignette: > | |||
%\VignetteIndexEntry{desctable tips} | |||
%\VignetteEngine{knitr::rmarkdown} | |||
%\VignetteEncoding{UTF-8} | |||
--- | |||
```{r, echo = F, message = F, warning = F} | |||
library(desctable) | |||
``` | |||
Here is collection of tips and tricks to go further with *desctable* | |||
# | |||
## | |||
### Label variables | |||
You can define labels for variables using the `.labels` argument in `desc_table` | |||
```{r} | |||
labels <- c(mpg = "Miles/(US) gallon", | |||
cyl = "Number of cylinders", | |||
disp = "Displacement (cu.in.)", | |||
hp = "Gross horsepower", | |||
drat = "Rear axle ratio", | |||
wt = "Weight (1000 lbs)", | |||
qsec = "1/4 mile time", | |||
vs = "Engine", | |||
am = "Transmission", | |||
gear = "Number of forward gears", | |||
CARBURATOR = "Number of carburetors") | |||
mtcars %>% | |||
desc_table(.labels = labels) %>% | |||
desc_output("DT") | |||
``` | |||
As you can see with `CARBURATOR` instead of `carb`, not all variables need to have a label, and unused labels are discarded. | |||
### Default statistics | |||
`desc_table` chooses its own statistics this way: | |||
- always show `N = length` | |||
- show `"%" = percent` if there is at least a factor | |||
- show `min`, `max`, `Q1`, `Q3`, `median`, `mean`, `sd`, `IQR` if there is at least a numeric | |||
### Defining your own default statistics | |||
You can define your own automatic statistic function using the `.auto` argument in `desc_table`. | |||
This function should accept one argument, the table to choose statistics for (in the case of a grouped dataframe the subtables will be passed to the function). It should return a list of statistics. | |||
Here is the code of `stats_auto`, the default value of `.auto` | |||
```{r, eval = F} | |||
stats_auto <- function(data) { | |||
data %>% | |||
lapply(is.numeric) %>% | |||
unlist() %>% | |||
any -> numeric | |||
data %>% | |||
lapply(is.factor) %>% | |||
unlist() %>% | |||
any() -> fact | |||
stats <- list("Min" = min, | |||
"Q1" = ~quantile(., .25), | |||
"Med" = stats::median, | |||
"Mean" = mean, | |||
"Q3" = ~quantile(., .75), | |||
"Max" = max, | |||
"sd" = stats::sd, | |||
"IQR" = IQR) | |||
if (fact & numeric) | |||
c(list("N" = length, | |||
"%" = percent), | |||
stats) | |||
else if (fact & !numeric) | |||
list("N" = length, | |||
"%" = percent) | |||
else if (!fact & numeric) | |||
stats | |||
} | |||
``` | |||
### Reuse a list of defined statistics | |||
If you often reuse the same statistics for multiple tables and you don't want to repeat yourself, you can splice a list to `desc_table` using the `rlang::!!!` operator | |||
```{r} | |||
stats = list(N = length, | |||
Mean = mean, | |||
SD = sd) | |||
mtcars %>% | |||
desc_table(!!!stats) %>% | |||
desc_output("DT") | |||
``` | |||
When splicing, all stats need to be explicitly named | |||
```{r} | |||
stats2 = list(N = length, | |||
mean, | |||
sd) | |||
mtcars %>% | |||
desc_table(!!!stats2) %>% | |||
desc_output("DT") | |||
``` | |||
You can also define a "dumb" automatic function | |||
```{r} | |||
default_stats <- function(data) | |||
{ | |||
list(N = length, | |||
mean, | |||
sd) | |||
} | |||
``` | |||
### Default statistical tests | |||
`desc_table` chooses its own statistical tests this way: | |||
- if the variable is a factor, use `fisher.test` | |||
- if `fisher.test` fails, fallback on `chisq.test` | |||
- if the variable is numeric, use | |||
- `wilcoxon.test` if there are two groups | |||
- `kruskal.test` if there are more than two groups | |||
### Defining your own default statistical tests | |||
You can define your own automatic statistic function using the `.auto` argument in `desc_tests`. | |||
This function should accept two arguments, the variable to compare and the grouping variable, and return a statistical test that accepts a `formula` argument and returns an object with a `p.value` element. | |||
Here is the code of `tests_auto`, the default value of `.auto` | |||
```{r, eval = F} | |||
tests_auto <- function(var, grp) { | |||
grp <- factor(grp) | |||
if (nlevels(grp) < 2) | |||
~no.test | |||
else if (is.factor(var)) { | |||
if (tryCatch(is.numeric(fisher.test(var ~ grp)$p.value), error = function(e) F)) | |||
~fisher.test | |||
else | |||
~chisq.test | |||
} else if (nlevels(grp) == 2) | |||
~wilcox.test | |||
else | |||
~kruskal.test | |||
} | |||
``` | |||
You can also provide a default statistical test using the `.default` argument | |||
```{r} | |||
mtcars %>% | |||
group_by(am) %>% | |||
desc_table(mean, sd) %>% | |||
desc_tests(.default = ~t.test) %>% | |||
desc_output("DT") | |||
``` | |||
Note that as with named tests, it is necessary to prepend the test name with a tilde (`~`). | |||
You can still choose individual tests when you define either a `.auto` or a `.default` test | |||
```{r, warning = F} | |||
mtcars %>% | |||
group_by(am) %>% | |||
desc_table(mean, sd, median, IQR) %>% | |||
desc_tests(.default = ~t.test, carb = ~wilcox.test) %>% | |||
desc_output("DT") | |||
``` | |||
Note that if a `.default` test is provided, `.auto` is ignored. | |||
### Output options | |||
You can set the number of significant digits to display with the `digits` argument. | |||
The p values are truncated at 1E-digits. | |||
```{r} | |||
iris %>% | |||
group_by(Species) %>% | |||
desc_table(mean, sd) %>% | |||
desc_tests() %>% | |||
desc_output("DT", digits = 10) | |||
``` | |||
Any additional argument given to `desc_output` will be carried to the output function | |||
```{r} | |||
iris %>% | |||
group_by(Species) %>% | |||
desc_table(mean, sd) %>% | |||
desc_output("DT", filter = "top") | |||
``` |