@@ -1,6 +1,6 @@ | |||
Package: desctable | |||
Title: Produce Descriptive and Comparative Tables Easily | |||
Version: 0.1.9 | |||
Version: 0.2.0 | |||
Authors@R: c(person("Maxime", "Wack", email = "maximewack@free.fr", role = c("aut", "cre")), | |||
person("Adrien", "Boukobza", email = "hadrien_b@hotmail.fr", role = c("aut")), | |||
person("Yihui", "Xie", email = "xieyihui@gmail.com", role = c("ctb"))) | |||
@@ -19,11 +19,12 @@ BugReports: https://github.com/maximewack/desctable/issues | |||
Imports: | |||
dplyr, | |||
DT, | |||
htmltools | |||
htmltools, | |||
rlang | |||
Suggests: | |||
knitr, | |||
rmarkdown, | |||
purrr, | |||
survival | |||
RoxygenNote: 7.1.1 | |||
RoxygenNote: 7.1.2 | |||
VignetteBuilder: knitr |
@@ -7,8 +7,6 @@ S3method(desctable,default) | |||
S3method(desctable,grouped_df) | |||
S3method(pander,desctable) | |||
S3method(print,desctable) | |||
S3method(statify,default) | |||
S3method(statify,formula) | |||
export("%>%") | |||
export(ANOVA) | |||
export(IQR) | |||
@@ -1,3 +1,8 @@ | |||
Version 0.2.0 | |||
- Add support for purrr::map-like formulas for statistical and test functions | |||
- "conditional" formulas are no longer supported. For example, replace `~ is.normal ~ mean | median` with `~ if (is.normal(.)) mean(.) else median(.)` | |||
Version 0.1.9 | |||
- Fix in default options for datatable output to follow DT update | |||
@@ -13,6 +13,11 @@ statColumn <- function(stat, data) { | |||
# Apply one statified stat function to every variable in the data | |||
# Return a simple vector for the column | |||
# Statify checks types and output for the stat function. Returns a numeric vector or a character vector if needed. | |||
if (length(stat) == 3) | |||
warning("Conditional formulas are deprecated and will be removed in 1.0.0 | |||
purrr::map style formulas are used now. | |||
For example, `is.normal ~ mean | median` becomes `~ if (is.normal(.)) mean(.) else median(.)`") | |||
data %>% | |||
lapply(statify, stat) %>% | |||
unlist() | |||
@@ -21,7 +26,7 @@ statColumn <- function(stat, data) { | |||
#' Generate the table of all statistics for all variables | |||
#' | |||
#' If stats is a list of functions, use them. | |||
#' If stats is a list of functions or purrr::map like formulas, use them. | |||
#' If it is a single function, use it with the entire data as | |||
#' its argument to produce a list of statistical functions to use. | |||
#' | |||
@@ -106,18 +111,18 @@ varColumn <- function(data, labels = NULL) { | |||
#' @section Stats: | |||
#' The stats can be a function which takes a dataframe and returns a list of statistical functions to use. | |||
#' | |||
#' stats can also be a named list of statistical functions, or formulas. | |||
#' | |||
#' The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats. If an element of the list is a formula, it can be used to conditionally use stats depending on the variable. | |||
#' stats can also be a named list of statistical functions, or purrr::map like formulas. | |||
#' | |||
#' The general form is \code{condition ~ T | F}, and can be nested, such as \code{is.factor ~ percent | (is.normal ~ mean | median)}, for example. | |||
#' The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats. | |||
#' | |||
#' @section Tests: | |||
#' The tests can be a function which takes a variable and a grouping variable, and returns an appropriate statistical test to use in that case. | |||
#' | |||
#' tests can also be a named list of statistical test functions, associating the name of a variable in the data, and a test to use specifically for that variable. | |||
#' tests can also be a named list of statistical test functions, associating the name of a variable in the data and a test to use specifically for that variable. | |||
#' | |||
#' That test name must be expressed as a single-term formula (e.g. \code{~t.test}). You don't have to specify tests for all the variables: a default test for all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}. | |||
#' That test name must be expressed as a single-term formula (e.g. \code{~t.test}), or a purrr::map like formula | |||
#' (e.g. \code{~t.test(., var.equal = T)}). You don't have to specify tests for all the variables: a default test for | |||
#' all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}. | |||
#' | |||
#' If data is a grouped dataframe (using \code{group_by}), subtables are created and statistic tests are performed over each sub-group. | |||
#' | |||
@@ -142,10 +147,10 @@ varColumn <- function(data, labels = NULL) { | |||
#' # Does the same as stats_auto here | |||
#' iris %>% | |||
#' desctable(stats = list("N" = length, | |||
#' "%/Mean" = is.factor ~ percent | (is.normal ~ mean), | |||
#' "sd" = is.normal ~ sd, | |||
#' "Med" = is.normal ~ NA | median, | |||
#' "IQR" = is.normal ~ NA | IQR)) | |||
#' "Mean" = ~ if (is.normal(.)) mean(.), | |||
#' "sd" = ~ if (is.normal(.)) sd(.), | |||
#' "Med" = stats::median, | |||
#' "IQR" = ~ if(!is.factor(.)) IQR(.))) | |||
#' | |||
#' # With labels | |||
#' mtcars %>% desctable(labels = c(hp = "Horse Power", | |||
@@ -245,8 +250,7 @@ testColumn <- function(df, tests, grp) { | |||
ftests[names(ftests) %in% forced_tests][forced_tests] <- tests[forced_tests] | |||
# Compute the tests (made safe with testify) on the variable, using the grouping variable | |||
df %>% | |||
purrr::map2(ftests, testify, group) %>% | |||
mapply(testify, df, ftests, MoreArgs = list(group = group), SIMPLIFY = F) %>% | |||
Reduce(f = rbind) | |||
} | |||
@@ -79,7 +79,7 @@ pander.desctable <- function(x = NULL, | |||
#' @note | |||
#' You are recommended to escape the table content for security reasons (e.g. XSS attacks) when using this function in Shiny or any other dynamic web applications. | |||
#' @references | |||
#' See \url{https://rstudio.github.io/DT} for the full documentation. | |||
#' See \url{https://rstudio.github.io/DT/} for the full documentation. | |||
#' @examples | |||
#' library(DT) | |||
#' | |||
@@ -14,16 +14,15 @@ | |||
#' @export | |||
#' @return The results for the function applied on the vector, compatible with the format of the result table | |||
statify <- function(x, f) { | |||
UseMethod("statify", f) | |||
} | |||
#' @rdname statify | |||
#' @export | |||
statify.default <- function(x, f) { | |||
# Discard NA values | |||
x <- stats::na.omit(x) | |||
## Deprecate conditional formula | |||
if (length(f) == 3) | |||
f <- parse_formula(x, f) | |||
else | |||
f <- rlang::as_function(f) | |||
# Try f(x), silent warnings and fail with NA | |||
res <- tryCatch(f(x), | |||
warning = function(e) suppressWarnings(f(x)), | |||
@@ -51,16 +50,6 @@ statify.default <- function(x, f) { | |||
} | |||
#' @rdname statify | |||
#' @export | |||
statify.formula <- function(x, f) { | |||
# if expression quoted with ~, evaluate the expression | |||
if (length(f) == 2) eval(f[[2]]) | |||
# else parse the formula (cond ~ T | F) | |||
else statify.default(x, parse_formula(x, f)) | |||
} | |||
#' Functions to create a list of statistics to use in desctable | |||
#' | |||
#' These functions take a dataframe as argument and return a list of statistcs in the form accepted by desctable. | |||
@@ -81,10 +70,10 @@ statify.formula <- function(x, f) { | |||
stats_default <- function(data) { | |||
list("N" = length, | |||
"%" = percent, | |||
"Mean" = is.normal ~ mean, | |||
"sd" = is.normal ~ sd, | |||
"Mean" = ~if (is.normal(.)) mean(.), | |||
"sd" = ~if (is.normal(.)) sd(.), | |||
"Med" = stats::median, | |||
"IQR" = is.factor ~ NA | IQR) | |||
"IQR" = ~if (!is.factor(.)) IQR(.)) | |||
} | |||
@@ -104,7 +93,7 @@ stats_nonnormal <- function(data) { | |||
list("N" = length, | |||
"%" = percent, | |||
"Median" = stats::median, | |||
"IQR" = is.factor ~ NA | IQR) | |||
"IQR" = ~if (!is.factor(.)) IQR(.)) | |||
} | |||
@@ -134,10 +123,10 @@ stats_auto <- function(data) { | |||
else if (fact & !normal & !nonnormal) list("N" = length, | |||
"%" = percent) | |||
else if (!fact & normal & nonnormal) list("N" = length, | |||
"Mean" = is.normal ~ mean, | |||
"sd" = is.normal ~ sd, | |||
"Mean" = ~if (is.normal(.)) mean(.), | |||
"sd" = ~if (is.normal(.)) sd(.), | |||
"Med" = stats::median, | |||
"IQR" = is.factor ~ NA | IQR) | |||
"IQR" = ~if (!is.factor(.)) IQR(.)) | |||
else if (!fact & normal & !nonnormal) list("N" = length, | |||
"Mean" = mean, | |||
"sd" = stats::sd) | |||
@@ -14,8 +14,11 @@ testify <- function(x, f, group) { | |||
Reduce(f = paste0) %>% | |||
substring(2) -> fun | |||
# If eval(f[[2]]) throws an error, then we may be in an rlang-formula | |||
tryCatch(f <- eval(f[[2]]), | |||
error = function(e) {f <<- rlang::as_function(f)}) | |||
# Try the function | |||
f <- eval(f[[2]]) | |||
p <- tryCatch(f(x ~ group)$p.value[1], | |||
error = function(e) {message(e);NaN}) | |||
@@ -51,14 +54,14 @@ tests_auto <- function(var, grp) { | |||
if (nlevels(grp) == 2) { | |||
if (all_normal) { | |||
if (tryCatch(stats::var.test(var ~ grp)$p.value > .1, warning = function(e) F, error = function(e) F)) ~. %>% t.test(var.equal = T) | |||
else ~. %>% t.test(var.equal = F) | |||
if (tryCatch(stats::var.test(var ~ grp)$p.value > .1, warning = function(e) F, error = function(e) F)) ~t.test(., var.equal = T) | |||
else ~t.test(., var.equal = F) | |||
} | |||
else ~wilcox.test | |||
} else { | |||
if (all_normal) { | |||
if (tryCatch(stats::bartlett.test(var ~ grp)$p.value > .1, warning = function(e) F, error = function(e) F)) ~. %>% oneway.test(var.equal = T) | |||
else ~. %>% oneway.test(var.equal = F) | |||
if (tryCatch(stats::bartlett.test(var ~ grp)$p.value > .1, warning = function(e) F, error = function(e) F)) ~oneway.test(., var.equal = T) | |||
else ~oneway.test(., var.equal = F) | |||
} | |||
else ~kruskal.test | |||
} | |||
@@ -112,13 +112,13 @@ head_datatable <- function(head) { | |||
TRs <- list() | |||
while (is.list(head[[1]])) { | |||
TR <- purrr::map2(names(head), lapply(head, attr, "colspan"), ~htmltools::tags$th(.x, colspan = .y)) | |||
TR <- mapply(function(x, y) htmltools::tags$th(x, colspan = y), names(head), lapply(head, attr, "colspan"), SIMPLIFY = F) | |||
TRs <- c(TRs, list(TR)) | |||
head <- purrr::flatten(head) | |||
} | |||
c(TRs, list(purrr::map2(names(head), head, ~htmltools::tags$th(.x, colspan = .y)))) | |||
c(TRs, list(mapply(function(x, y) htmltools::tags$th(x, colspan = y), names(head), head, SIMPLIFY = F))) | |||
} | |||
@@ -190,10 +190,10 @@ header <- function(desctable, output = c("pander", "datatable", "dataframe")) { | |||
} | |||
#' Build a header list object | |||
#' build a header list object | |||
#' | |||
#' @param desctable A desctable | |||
#' @return A nested list of headers with colspans | |||
#' @param desctable a desctable | |||
#' @return a nested list of headers with colspans | |||
headerList <- function(desctable) { | |||
if (is.data.frame(desctable)) length(desctable) | |||
else { | |||
@@ -71,6 +71,7 @@ iris %>% | |||
desctable(mtcars) | |||
``` | |||
<br> | |||
As you can see with these two examples, `desctable` describes every variable, with individual levels for factors. It picks statistical functions depending on the type and distribution of the variables in the data, and applies those statistical functions only on the relevant variables. | |||
@@ -86,13 +87,17 @@ iris %>% | |||
``` | |||
<br> | |||
To use `pander` you need to load the package yourself. | |||
Calls to `pander` and `datatable` with "regular" dataframes will not be affected by the defaults used in the package, and you can modify these defaults for **desctable** objects. | |||
The `datatable` wrapper function for desctable objects comes with some default options and formatting such as freezing the row names and table header, export buttons, and rounding of values. Both `pander` and `datatable` wrapper take a *digits* argument to set the number of decimals to show. (`pander` uses the *digits*, *justify* and *missing* arguments of `pandoc.table`, whereas `datatable` calls `prettyNum` with the `digits` parameter, and removes `NA` values. You can set `digits = NULL` if you want the full table and format it yourself) | |||
Subsequent outputs in this README will use **pander**. | |||
## Advanced usage | |||
`desctable` chooses statistical functions for you using this algorithm: | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* always show N | |||
* if there are factors, show % | |||
@@ -101,26 +106,34 @@ The `datatable` wrapper function for desctable objects comes with some default o | |||
For each variable in the table, compute the relevant statistical functions in that list (non-applicable functions will safely return `NA`). | |||
How does it work, and how can you adapt this behavior to your needs? | |||
You can specify the statistical functions yourself with the *stats* argument. This argument can either be: | |||
* a function for automatic selection of appropriate statistical functions, depending on the data | |||
* a named list of functions/formulas | |||
The functions/formulas leverage the **tidyverse** way of working with anonymous functions, i.e.: | |||
`desctable` takes an optional *stats* argument. This argument can either be: | |||
If a *function*, is is used as is. | |||
If a *formula*, e.g. '~ .x + 1' or `~ . + 1`, it is converted to a function. There are three ways to refer to the arguments: | |||
* an automatic function to select appropriate statistical functions | |||
* or a named list of | |||
* statistical functions | |||
* formulas describing conditions to use a statistical function. | |||
* For a single argument function, use '.' | |||
* For a two argument function, use '.x' and '.y' | |||
* For more arguments, use '..1', '..2', '..3' etc | |||
This syntax allows you to create very compact anonymous functions, and is the same as in the `map` family of functions from **purrr**. | |||
**Conditional formulas (`condition ~ if_T | if F`) from previous versions are no longer supported!** | |||
### Automatic function | |||
This is the default, using the `stats_auto` function provided in the package. | |||
The default value for the *stats* argument is `stats_auto`, provided in the package. | |||
Several other "automatic statistical functions" are defined in this package: `stats_auto`, `stats_default`, `stats_normal`, `stats_nonnormal`. | |||
You can also provide your own automatic function, which needs to | |||
You can also provide your own automatic function, which needs to | |||
* accept a dataframe as its argument (whether to use this dataframe or not in the function is your choice), and | |||
* return a named list of statistical functions to use, as defined in the subsequent paragraphs. | |||
* accept a dataframe as its argument (whether to use this dataframe or not in the function is your choice), and | |||
* return a named list of statistical functions to use, as defined in the subsequent paragraphs. | |||
```{r} | |||
# Strictly equivalent to iris %>% desctable() %>% pander() | |||
@@ -128,14 +141,21 @@ iris %>% | |||
desctable(stats = stats_auto) %>% | |||
pander() | |||
``` | |||
<br> | |||
For reference, here is the body of the `stats_auto` function in the package: | |||
```{r, echo = F} | |||
print(stats_auto) | |||
``` | |||
<br> | |||
### Statistical functions | |||
Statistical functions can be any function defined in R that you want to use, such as `length` or `mean`. | |||
Statistical functions can be **any** function defined in R that you want to use, such as `length` or `mean`. | |||
The only condition is that they return a single numerical value. One exception is when they return a vector of length `1 + nlevels(x)` when applied to factors, as is needed for the `percent` function. | |||
As mentioned above, they need to be used inside a named list, such as | |||
As mentioned above, they need to be used inside a **named list**, such as | |||
```{r} | |||
mtcars %>% | |||
@@ -146,54 +166,22 @@ mtcars %>% | |||
The names will be used as column headers in the resulting table, and the functions will be applied safely on the variables (errors return `NA`, and for factors the function will be used on individual levels). | |||
Several convenience functions are included in this package. For statistical function we have: `percent`, which prints percentages of levels in a factor, and `IQR` which re-implements `stats::IQR` but works better with `NA` values. | |||
Several convenience functions are included in this package. | |||
Be aware that **all functions will be used on variables stripped of their `NA` values!** | |||
This is necessary for most statistical functions to be useful, and makes **N** (`length`) show only the number of observations in the dataset for each variable. | |||
### Conditional formulas | |||
* `percent`, which prints percentages of levels in a factor | |||
* `IQR`, which re-implements `stats::IQR` but works better with `NA` values | |||
* `is.normal`, which tests for normality using the following method: `length(na.omit(x)) > 30 & shapiro.test(x)$p.value > .1` | |||
The general form of these formulas is | |||
```{r, eval = F} | |||
predicate_function ~ stat_function_if_TRUE | stat_function_if_FALSE | |||
``` | |||
A predicate function is any function returning either `TRUE` or `FALSE` when applied on a vector, such as `is.factor`, `is.numeric`, and `is.logical`. | |||
**desctable** provides the `is.normal` function to test for normality (it is equivalent to `length(na.omit(x)) > 30 & shapiro.test(x)$p.value > .1`). | |||
The *FALSE* option can be omitted and `NA` will be produced if the condition in the predicate is not met. | |||
These statements can be nested using parentheses. | |||
For example: | |||
`is.factor ~ percent | (is.normal ~ mean)` | |||
will either use `percent` if the variable is a factor, or `mean` if and only if the variable is normally distributed. | |||
You can mix "bare" statistical functions and formulas in the list defining the statistics you want to use in your table. | |||
```{r} | |||
iris %>% | |||
desctable(stats = list("N" = length, | |||
"%/Mean" = is.factor ~ percent | (is.normal ~ mean), | |||
"Median" = is.normal ~ NA | median)) %>% | |||
pander() | |||
``` | |||
<br> | |||
For reference, here is the body of the `stats_auto` function in the package: | |||
```{r, echo = F} | |||
print(stats_auto) | |||
``` | |||
Be aware that **all functions will be used on variables stripped of their `NA` values!** | |||
This is necessary for most statistical functions to be useful, and makes **N** (`length`) show only the number of observations in the dataset for each variable. | |||
### Labels | |||
It is often the case that variable names are not "pretty" enough to be used as-is in a table. | |||
Although you could still edit the variable labels in the table afterwards using subsetting or string replacement functions, it is possible to mention a **labels** argument. | |||
Although you could still edit the variable labels in the table afterwards using sub-setting or string replacement functions, we provide a facility for this using the **labels** argument. | |||
The **labels** argument is a named character vector associating variable names and labels. | |||
You don't need to provide labels for all the variables, and extra labels will be silently discarded. This allows you to define a "global" labels vector and use it for every table even after variable selections. | |||
You don't need to provide labels for all the variables, and extra labels will be silently discarded. This allows you to define a "global" labels vector and use it for multiple tables even after variable selections. | |||
```{r} | |||
mtlabels <- c(mpg = "Miles/(US) gallon", | |||
@@ -223,7 +211,7 @@ mtcars %>% | |||
Creating a comparative table (between groups defined by a factor) using `desctable` is as easy as creating a descriptive table. | |||
It uses the well known `group_by` function from **dplyr**: | |||
It leverages the `group_by` function from **dplyr**: | |||
```{r} | |||
iris %>% | |||
@@ -232,16 +220,18 @@ iris %>% | |||
iris_by_Species | |||
``` | |||
<br> | |||
The result is a table containing a descriptive subtable for each level of the grouping factor (the statistical functions rules are applied to each subtable independently), with the statistical tests performed, and their p values. | |||
The result is a table containing a descriptive sub-table for each level of the grouping factor (the statistical functions rules are applied to each sub-table independently), with the statistical tests performed, and their p values. | |||
When displayed as a flat dataframe, the grouping header appears in each variable. | |||
When displayed as a flat dataframe, the grouping header appears in each variable name. | |||
You can also see the grouping headers by inspecting the resulting object, which is a deep list of dataframes, each dataframe named after the grouping factor and its levels (with sample size for each). | |||
You can also see the grouping headers by inspecting the resulting object, which is a nested list of dataframes, each dataframe being named after the grouping factor and its levels (with sample size for each). | |||
```{r} | |||
str(iris_by_Species) | |||
``` | |||
<br> | |||
You can specify groups based on any variable, not only factors: | |||
@@ -252,7 +242,9 @@ mtcars %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
Also with conditions: | |||
<br> | |||
You can also specify groups based on an expression | |||
```{r} | |||
iris %>% | |||
@@ -262,7 +254,7 @@ iris %>% | |||
``` | |||
<br> | |||
And even on multiple nested groups: | |||
Multiple nested groups are also possible: | |||
```{r, message = F, warning = F} | |||
mtcars %>% | |||
@@ -279,7 +271,7 @@ Statistical tests are automatically selected depending on the data and the group | |||
## Advanced usage | |||
`desctable` choses the statistical tests using the following algorithm: | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* if the variable is a factor, use `fisher.test` | |||
* if the grouping factor has only one level, use the provided `no.test` (which does nothing) | |||
@@ -292,26 +284,28 @@ Statistical tests are automatically selected depending on the data and the group | |||
* and the variable does not present homoskedasticity (p value for `bartlett.test` < .1) but normality of distribution in all groups, use `oneway.test(var.equal = F)` | |||
* else use `kruskal.test` | |||
But what if you want to pick a specific test for a specific variable, or change all the tests altogether? | |||
You can specify the statistical test functions yourself with the *tests* argument. This argument can either be: | |||
`desctable` takes an optional *tests* argument. This argument can either be | |||
* a function for automatic selection of appropriate statistical test functions, depending on the data | |||
* a named list of statistical test functions | |||
* an automatic function to select appropriate statistical test functions | |||
* or a named list of statistical test functions | |||
Please note that the statistical test functions **must** be given as *formulas* so as to capture the name of the test to display in the table. | |||
**purrr** style formulas are also accepted, as with the statistical functions. | |||
This also allows to specify optional arguments of such functions, and go around non-standard test functions (see **Statistical test functions**). | |||
### Automatic function | |||
This is the default, using the `tests_auto` function provided in the package. | |||
The default value for the *tests* argument is `tests_auto`, provided in the package. | |||
You can also provide your own automatic function, which needs to | |||
* accept a variable and a grouping factor as its arguments, and | |||
* return a single-term formula containing a statistical test function. | |||
* accept a variable and a grouping factor as its arguments, and | |||
* return a single-term formula containing a statistical test function. | |||
This function will be used on every variable and every grouping factor to determine the appropriate test. | |||
```{r} | |||
# Strictly equivalent to iris %>% group_by(Species) %>% desctable %>% pander | |||
# Strictly equivalent to iris %>% group_by(Species) %>% desctable() %>% pander() | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable(tests = tests_auto) %>% | |||
@@ -319,14 +313,20 @@ iris %>% | |||
``` | |||
<br> | |||
### List of statistical test functions | |||
For reference, here is the body of the `tests_auto` function in the package: | |||
```{r, echo = F} | |||
print(tests_auto) | |||
``` | |||
<br> | |||
### Statistical test functions | |||
You can provide a named list of statistical functions, but here the mechanism is a bit different from the *stats* argument. | |||
The list must contain either `.auto` or `.default`. | |||
* `.auto` needs to be an automatic function, such as `tests_auto`. It will be used by default on all variables to select a test | |||
* `.default` needs to be a single-term formula containing a statistical test function that will be used on all variables | |||
* `.auto` needs to be an automatic function, such as `tests_auto`. It will be used by default on all variables to select a test | |||
* `.default` needs to be a single-term formula containing a statistical test function that will be used on all variables | |||
You can also provide overrides to use specific tests for specific variables. | |||
This is done using list items named as the variable and containing a single-term formula function. | |||
@@ -348,55 +348,22 @@ mtcars %>% | |||
mpg = ~t.test)) %>% | |||
pander() | |||
``` | |||
<br> | |||
You might wonder why the formula expression. That is needed to capture the test name, and to provide it in the resulting table. | |||
As with statistical functions, any statistical test function defined in R can be used. | |||
The conditions are that the function | |||
* accepts a formula (`variable ~ grouping_variable`) as a first positional argument (as is the case with most tests, like `t.test`), and | |||
* returns an object with a `p.value` element. | |||
Several convenience function are provided: formula versions for `chisq.test` and `fisher.test` using generic S3 methods (thus the behavior of standard calls to `chisq.test` and `fisher.test` are not modified), and `ANOVA`, a partial application of `oneway.test` with parameter *var.equal* = T. | |||
# Tips and tricks | |||
In the *stats* argument, you can not only feed function names, but even arbitrary function definitions, functional sequences (a feature provided with the pipe (`%>%`)), or partial applications (with the **purrr** package): | |||
Here's an example of **purrr** style function: | |||
```{r} | |||
mtcars %>% | |||
desctable(stats = list("N" = length, | |||
"Sum of squares" = function(x) sum(x^2), | |||
"Q1" = . %>% quantile(prob = .25), | |||
"Q3" = purrr::partial(quantile, probs = .75))) %>% | |||
pander() | |||
``` | |||
<br> | |||
In the *tests* arguments, you can also provide function definitions, functional sequences, and partial applications in the formulas: | |||
```{r} | |||
iris %>% | |||
group_by(Species) %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Sepal.Width = ~function(f) oneway.test(f, var.equal = F), | |||
Petal.Length = ~. %>% oneway.test(var.equal = T), | |||
Sepal.Length = ~purrr::partial(oneway.test, var.equal = T))) %>% | |||
pander() | |||
Petal.Width = ~oneway.test(., var.equal = T))) | |||
``` | |||
<br> | |||
This allows you to modulate the behavior of `desctable` in every detail, such as using paired tests, or non *htest* tests. | |||
```{r} | |||
# This is a contrived example, which would be better solved with a dedicated function | |||
library(survival) | |||
As with statistical functions, **any** statistical test function defined in R can be used. | |||
bladder$surv <- Surv(bladder$stop, bladder$event) | |||
The conditions are that the function | |||
bladder %>% | |||
group_by(rx) %>% | |||
desctable(tests = list(.default = ~wilcox.test, | |||
surv = ~. %>% survdiff %>% .$chisq %>% pchisq(1, lower.tail = F) %>% list(p.value = .))) %>% | |||
pander() | |||
``` | |||
* accepts a formula (`variable ~ grouping_variable`) as a first positional argument (as is the case with most tests, like `t.test`), and | |||
* returns an object with a `p.value` element. | |||
Several convenience function are provided: formula versions for `chisq.test` and `fisher.test` using generic S3 methods (thus the behavior of standard calls to `chisq.test` and `fisher.test` are not modified), and `ANOVA`, a partial application of `oneway.test` with parameter *var.equal* = T. |
@@ -88,14 +88,20 @@ alternatively, you can specify numeric column indices or column names to | |||
indicate which columns to escape, e.g. \code{1:5} (the first 5 columns), | |||
\code{c(1, 3, 4)}, or \code{c(-1, -3)} (all columns except the first and | |||
third), or \code{c('Species', 'Sepal.Length')}; since the row names take | |||
the first column to display, you should add the numeric column indices | |||
by one when using \code{rownames}} | |||
\item{style}{the style name (\url{https://datatables.net/manual/styling/}); | |||
currently only \code{'default'}, \code{'bootstrap'}, and | |||
\code{'bootstrap4'} are supported. Note that DT doesn't contain the theme | |||
files so in order to display the style correctly, you have to link | |||
the necessary files in the header.} | |||
the first column to display, you should add the numeric column indices by | |||
one when using \code{rownames}} | |||
\item{style}{either \code{'auto'}, \code{'default'}, \code{'bootstrap'}, or | |||
\code{'bootstrap4'}. If \code{'auto'}, and a **bslib** theme is | |||
currently active, then bootstrap styling is used in a way that "just works" | |||
for the active theme. Otherwise, | |||
\href{https://datatables.net/manual/styling/classes}{DataTables | |||
\code{'default'} styling} is used. If set explicitly to \code{'bootstrap'} | |||
or \code{'bootstrap4'}, one must take care to ensure Bootstrap's HTML | |||
dependencies (as well as Bootswatch themes, if desired) are included on the | |||
page. Note, when set explicitly, it's the user's responsibility to ensure | |||
that only one unique `style` value is used on the same page, if multiple | |||
DT tables exist, as different styling resources may conflict with each other.} | |||
\item{width}{Width/Height in pixels (optional, defaults to automatic | |||
sizing)} | |||
@@ -110,7 +116,9 @@ it's containing element. If the table can't fit fully into it's container | |||
then vertical and/or horizontal scrolling of the table cells will occur.} | |||
\item{autoHideNavigation}{\code{TRUE} to automatically hide navigational UI | |||
when the number of total records is less than the page size.} | |||
(only display the table body) when the number of total records is less | |||
than the page size. Note, it only works on the client-side processing mode | |||
and the `pageLength` option should be provided explicitly.} | |||
\item{selection}{the row/column selection mode (single or multiple selection | |||
or disable selection) when a table widget is rendered in a Shiny app; | |||
@@ -181,5 +189,5 @@ datatable(data.frame(x = Sys.time())) | |||
### | |||
} | |||
\references{ | |||
See \url{https://rstudio.github.io/DT} for the full documentation. | |||
See \url{https://rstudio.github.io/DT/} for the full documentation. | |||
} |
@@ -42,20 +42,20 @@ labels must be given in the form c(unquoted_variable_name = "label") | |||
The stats can be a function which takes a dataframe and returns a list of statistical functions to use. | |||
stats can also be a named list of statistical functions, or formulas. | |||
stats can also be a named list of statistical functions, or purrr::map like formulas. | |||
The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats. If an element of the list is a formula, it can be used to conditionally use stats depending on the variable. | |||
The general form is \code{condition ~ T | F}, and can be nested, such as \code{is.factor ~ percent | (is.normal ~ mean | median)}, for example. | |||
The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats. | |||
} | |||
\section{Tests}{ | |||
The tests can be a function which takes a variable and a grouping variable, and returns an appropriate statistical test to use in that case. | |||
tests can also be a named list of statistical test functions, associating the name of a variable in the data, and a test to use specifically for that variable. | |||
tests can also be a named list of statistical test functions, associating the name of a variable in the data and a test to use specifically for that variable. | |||
That test name must be expressed as a single-term formula (e.g. \code{~t.test}). You don't have to specify tests for all the variables: a default test for all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}. | |||
That test name must be expressed as a single-term formula (e.g. \code{~t.test}), or a purrr::map like formula | |||
(e.g. \code{~t.test(., var.equal = T)}). You don't have to specify tests for all the variables: a default test for | |||
all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}. | |||
If data is a grouped dataframe (using \code{group_by}), subtables are created and statistic tests are performed over each sub-group. | |||
} | |||
@@ -72,10 +72,10 @@ iris \%>\% | |||
# Does the same as stats_auto here | |||
iris \%>\% | |||
desctable(stats = list("N" = length, | |||
"\%/Mean" = is.factor ~ percent | (is.normal ~ mean), | |||
"sd" = is.normal ~ sd, | |||
"Med" = is.normal ~ NA | median, | |||
"IQR" = is.normal ~ NA | IQR)) | |||
"Mean" = ~ if (is.normal(.)) mean(.), | |||
"sd" = ~ if (is.normal(.)) sd(.), | |||
"Med" = stats::median, | |||
"IQR" = ~ if(!is.factor(.)) IQR(.))) | |||
# With labels | |||
mtcars \%>\% desctable(labels = c(hp = "Horse Power", | |||
@@ -2,16 +2,16 @@ | |||
% Please edit documentation in R/utils.R | |||
\name{headerList} | |||
\alias{headerList} | |||
\title{Build a header list object} | |||
\title{build a header list object} | |||
\usage{ | |||
headerList(desctable) | |||
} | |||
\arguments{ | |||
\item{desctable}{A desctable} | |||
\item{desctable}{a desctable} | |||
} | |||
\value{ | |||
A nested list of headers with colspans | |||
a nested list of headers with colspans | |||
} | |||
\description{ | |||
Build a header list object | |||
build a header list object | |||
} |
@@ -1,28 +0,0 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{parse_formula} | |||
\alias{parse_formula} | |||
\title{Parse a formula} | |||
\usage{ | |||
parse_formula(x, f) | |||
} | |||
\arguments{ | |||
\item{x}{The variable to test it on} | |||
\item{f}{A formula to parse} | |||
} | |||
\value{ | |||
A function to use as a stat/test | |||
} | |||
\description{ | |||
Parse a formula defining the conditions to pick a stat/test | |||
} | |||
\details{ | |||
Parse a formula defining the conditions to pick a stat/test | |||
and return the function to use. | |||
The formula is to be given in the form of | |||
conditional ~ T | F | |||
and conditions can be nested such as | |||
conditional1 ~ (conditional2 ~ T | F) | F | |||
The FALSE option can be omitted, and the TRUE can be replaced with NA | |||
} |
@@ -15,7 +15,7 @@ statTable(data, stats) | |||
A dataframe of all statistics for all variables | |||
} | |||
\description{ | |||
If stats is a list of functions, use them. | |||
If stats is a list of functions or purrr::map like formulas, use them. | |||
If it is a single function, use it with the entire data as | |||
its argument to produce a list of statistical functions to use. | |||
} |
@@ -2,15 +2,9 @@ | |||
% Please edit documentation in R/stats.R | |||
\name{statify} | |||
\alias{statify} | |||
\alias{statify.default} | |||
\alias{statify.formula} | |||
\title{Transform any function into a valid stat function for the table} | |||
\usage{ | |||
statify(x, f) | |||
\method{statify}{default}(x, f) | |||
\method{statify}{formula}(x, f) | |||
} | |||
\arguments{ | |||
\item{x}{A vector} | |||
@@ -52,6 +52,7 @@ iris %>% | |||
desctable(mtcars) | |||
``` | |||
<br> | |||
As you can see with these two examples, `desctable` describes every variable, with individual levels for factors. It picks statistical functions depending on the type and distribution of the variables in the data, and applies those statistical functions only on the relevant variables. | |||
@@ -70,13 +71,18 @@ mtcars %>% | |||
datatable() | |||
``` | |||
<br> | |||
To use `pander` you need to load the package yourself. | |||
Calls to `pander` and `datatable` with "regular" dataframes will not be affected by the defaults used in the package, and you can modify these defaults for **desctable** objects. | |||
Subsequent outputs in this vignette section will use **DT**. The `datatable` wrapper function for desctable objects comes with some default options and formatting such as freezing the row names and table header, export buttons, and rounding of values. Both `pander` and `datatable` wrapper take a *digits* argument to set the number of decimals to show. (`pander` uses the *digits*, *justify* and *missing* arguments of `pandoc.table`, whereas `datatable` calls `prettyNum` with the `digits` parameter, and removes `NA` values. You can set `digits = NULL` if you want the full table and format it yourself) | |||
The `datatable` wrapper function for desctable objects comes with some default options and formatting such as freezing the row names and table header, export buttons, and rounding of values. Both `pander` and `datatable` wrapper take a *digits* argument to set the number of decimals to show. (`pander` uses the *digits*, *justify* and *missing* arguments of `pandoc.table`, whereas `datatable` calls `prettyNum` with the `digits` parameter, and removes `NA` values. You can set `digits = NULL` if you want the full table and format it yourself) | |||
Subsequent outputs in this vignette will use **DT**. | |||
## Advanced usage | |||
`desctable` chooses statistical functions for you using this algorithm: | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* always show N | |||
* if there are factors, show % | |||
@@ -85,41 +91,56 @@ Subsequent outputs in this vignette section will use **DT**. The `datatable` wra | |||
For each variable in the table, compute the relevant statistical functions in that list (non-applicable functions will safely return `NA`). | |||
How does it work, and how can you adapt this behavior to your needs? | |||
You can specify the statistical functions yourself with the *stats* argument. This argument can either be: | |||
* a function for automatic selection of appropriate statistical functions, depending on the data | |||
* a named list of functions/formulas | |||
`desctable` takes an optional *stats* argument. This argument can either be: | |||
The functions/formulas leverage the **tidyverse** way of working with anonymous functions, i.e.: | |||
* an automatic function to select appropriate statistical functions | |||
* or a named list of | |||
* statistical functions | |||
* formulas describing conditions to use a statistical function. | |||
If a *function*, is is used as is. | |||
If a *formula*, e.g. '~ .x + 1' or `~ . + 1`, it is converted to a function. There are three ways to refer to the arguments: | |||
* For a single argument function, use '.' | |||
* For a two argument function, use '.x' and '.y' | |||
* For more arguments, use '..1', '..2', '..3' etc | |||
This syntax allows you to create very compact anonymous functions, and is the same as in the `map` family of functions from **purrr**. | |||
**Conditional formulas (`condition ~ if_T | if F`) from previous versions are no longer supported!** | |||
### Automatic function | |||
This is the default, using the `stats_auto` function provided in the package. | |||
The default value for the *stats* argument is `stats_auto`, provided in the package. | |||
Several other "automatic statistical functions" are defined in this package: `stats_auto`, `stats_default`, `stats_normal`, `stats_nonnormal`. | |||
You can also provide your own automatic function, which needs to | |||
You can also provide your own automatic function, which needs to | |||
* accept a dataframe as its argument (whether to use this dataframe or not in the function is your choice), and | |||
* return a named list of statistical functions to use, as defined in the subsequent paragraphs. | |||
* accept a dataframe as its argument (whether to use this dataframe or not in the function is your choice), and | |||
* return a named list of statistical functions to use, as defined in the subsequent paragraphs. | |||
```{r} | |||
# Strictly equivalent to iris %>% desctable %>% datatable | |||
# Strictly equivalent to iris %>% desctable() %>% datatable() | |||
iris %>% | |||
desctable(stats = stats_auto) %>% | |||
datatable() | |||
``` | |||
<br> | |||
For reference, here is the body of the `stats_auto` function in the package: | |||
```{r, echo = F} | |||
print(stats_auto) | |||
``` | |||
<br> | |||
### Statistical functions | |||
Statistical functions can be any function defined in R that you want to use, such as `length` or `mean`. | |||
Statistical functions can be **any** function defined in R that you want to use, such as `length` or `mean`. | |||
The only condition is that they return a single numerical value. One exception is when they return a vector of length `1 + nlevels(x)` when applied to factors, as is needed for the `percent` function. | |||
As mentioned above, they need to be used inside a named list, such as | |||
As mentioned above, they need to be used inside a **named list**, such as | |||
```{r} | |||
mtcars %>% | |||
@@ -130,54 +151,22 @@ mtcars %>% | |||
The names will be used as column headers in the resulting table, and the functions will be applied safely on the variables (errors return `NA`, and for factors the function will be used on individual levels). | |||
Several convenience functions are included in this package. For statistical function we have: `percent`, which prints percentages of levels in a factor, and `IQR` which re-implements `stats::IQR` but works better with `NA` values. | |||
Be aware that **all functions will be used on variables stripped of their `NA` values!** | |||
This is necessary for most statistical functions to be useful, and makes **N** (`length`) show only the number of observations in the dataset for each variable. | |||
### Conditional formulas | |||
Several convenience functions are included in this package. | |||
The general form of these formulas is | |||
* `percent`, which prints percentages of levels in a factor | |||
* `IQR`, which re-implements `stats::IQR` but works better with `NA` values | |||
* `is.normal`, which tests for normality using the following method: `length(na.omit(x)) > 30 & shapiro.test(x)$p.value > .1` | |||
```{r, eval = F} | |||
predicate_function ~ stat_function_if_TRUE | stat_function_if_FALSE | |||
``` | |||
A predicate function is any function returning either `TRUE` or `FALSE` when applied on a vector, such as `is.factor`, `is.numeric`, and `is.logical`. | |||
**desctable** provides the `is.normal` function to test for normality (it is equivalent to `length(na.omit(x)) > 30 & shapiro.test(x)$p.value > .1`). | |||
The *FALSE* option can be omitted and `NA` will be produced if the condition in the predicate is not met. | |||
These statements can be nested using parentheses. | |||
For example: | |||
`is.factor ~ percent | (is.normal ~ mean)` | |||
will either use `percent` if the variable is a factor, or `mean` if and only if the variable is normally distributed. | |||
You can mix "bare" statistical functions and formulas in the list defining the statistics you want to use in your table. | |||
```{r} | |||
iris %>% | |||
desctable(stats = list("N" = length, | |||
"%/Mean" = is.factor ~ percent | (is.normal ~ mean), | |||
"Median" = is.normal ~ NA | median)) %>% | |||
datatable() | |||
``` | |||
<br> | |||
For reference, here is the body of the `stats_auto` function in the package: | |||
```{r, echo = F} | |||
print(stats_auto) | |||
``` | |||
Be aware that **all functions will be used on variables stripped of their `NA` values!** | |||
This is necessary for most statistical functions to be useful, and makes **N** (`length`) show only the number of observations in the dataset for each variable. | |||
### Labels | |||
It is often the case that variable names are not "pretty" enough to be used as-is in a table. | |||
Although you could still edit the variable labels in the table afterwards using subsetting or string replacement functions, it is possible to mention a **labels** argument. | |||
Although you could still edit the variable labels in the table afterwards using sub-setting or string replacement functions, we provide a facility for this using the **labels** argument. | |||
The **labels** argument is a named character vector associating variable names and labels. | |||
You don't need to provide labels for all the variables, and extra labels will be silently discarded. This allows you to define a "global" labels vector and use it for every table even after variable selections. | |||
You don't need to provide labels for all the variables, and extra labels will be silently discarded. This allows you to define a "global" labels vector and use it for multiple tables even after variable selections. | |||
```{r} | |||
mtlabels <- c(mpg = "Miles/(US) gallon", | |||
@@ -207,7 +196,7 @@ mtcars %>% | |||
Creating a comparative table (between groups defined by a factor) using `desctable` is as easy as creating a descriptive table. | |||
It uses the well known `group_by` function from **dplyr**: | |||
It leverages the `group_by` function from **dplyr**: | |||
```{r} | |||
iris %>% | |||
@@ -216,16 +205,18 @@ iris %>% | |||
iris_by_Species | |||
``` | |||
<br> | |||
The result is a table containing a descriptive subtable for each level of the grouping factor (the statistical functions rules are applied to each subtable independently), with the statistical tests performed, and their p values. | |||
The result is a table containing a descriptive sub-table for each level of the grouping factor (the statistical functions rules are applied to each sub-table independently), with the statistical tests performed, and their p values. | |||
When displayed as a flat dataframe, the grouping header appears in each variable. | |||
When displayed as a flat dataframe, the grouping header appears in each variable name. | |||
You can also see the grouping headers by inspecting the resulting object, which is a deep list of dataframes, each dataframe named after the grouping factor and its levels (with sample size for each). | |||
You can also see the grouping headers by inspecting the resulting object, which is a nested list of dataframes, each dataframe being named after the grouping factor and its levels (with sample size for each). | |||
```{r} | |||
str(iris_by_Species) | |||
``` | |||
<br> | |||
You can specify groups based on any variable, not only factors: | |||
@@ -236,7 +227,9 @@ mtcars %>% | |||
desctable() %>% | |||
pander() | |||
``` | |||
Also with conditions: | |||
<br> | |||
You can also specify groups based on an expression | |||
```{r} | |||
# With datatable output | |||
@@ -247,7 +240,7 @@ iris %>% | |||
``` | |||
<br> | |||
And even on multiple nested groups: | |||
Multiple nested groups are also possible: | |||
```{r, message = F, warning = F} | |||
mtcars %>% | |||
@@ -264,7 +257,7 @@ Statistical tests are automatically selected depending on the data and the group | |||
## Advanced usage | |||
`desctable` choses the statistical tests using the following algorithm: | |||
`desctable` automatically chooses statistical functions if none is provided, using the following algorithm: | |||
* if the variable is a factor, use `fisher.test` | |||
* if the grouping factor has only one level, use the provided `no.test` (which does nothing) | |||
@@ -277,26 +270,28 @@ Statistical tests are automatically selected depending on the data and the group | |||
* and the variable does not present homoskedasticity (p value for `bartlett.test` < .1) but normality of distribution in all groups, use `oneway.test(var.equal = F)` | |||
* else use `kruskal.test` | |||
But what if you want to pick a specific test for a specific variable, or change all the tests altogether? | |||
You can specify the statistical test functions yourself with the *tests* argument. This argument can either be: | |||
`desctable` takes an optional *tests* argument. This argument can either be | |||
* a function for automatic selection of appropriate statistical test functions, depending on the data | |||
* a named list of statistical test functions | |||
* an automatic function to select appropriate statistical test functions | |||
* or a named list of statistical test functions | |||
Please note that the statistical test functions **must** be given as *formulas* so as to capture the name of the test to display in the table. | |||
**purrr** style formulas are also actepted, as with the statistical functions. | |||
This also allows to specify optional arguments of such functions, and go around non-standard test functions (see **Statistical test functions**). | |||
### Automatic function | |||
This is the default, using the `tests_auto` function provided in the package. | |||
The default value for the *tests* argument is `tests_auto`, provided in the package. | |||
You can also provide your own automatic function, which needs to | |||
* accept a variable and a grouping factor as its arguments, and | |||
* return a single-term formula containing a statistical test function. | |||
* accept a variable and a grouping factor as its arguments, and | |||
* return a single-term formula containing a statistical test function. | |||
This function will be used on every variable and every grouping factor to determine the appropriate test. | |||
```{r} | |||
# Strictly equivalent to iris %>% group_by(Species) %>% desctable %>% datatable | |||
# Strictly equivalent to iris %>% group_by(Species) %>% desctable() %>% datatable() | |||
iris %>% | |||
group_by(Species) %>% | |||
desctable(tests = tests_auto) %>% | |||
@@ -304,14 +299,20 @@ iris %>% | |||
``` | |||
<br> | |||
### List of statistical test functions | |||
For reference, here is the body of the `tests_auto` function in the package: | |||
```{r, echo = F} | |||
print(tests_auto) | |||
``` | |||
<br> | |||
### Statistical test functions | |||
You can provide a named list of statistical functions, but here the mechanism is a bit different from the *stats* argument. | |||
The list must contain either `.auto` or `.default`. | |||
* `.auto` needs to be an automatic function, such as `tests_auto`. It will be used by default on all variables to select a test | |||
* `.default` needs to be a single-term formula containing a statistical test function that will be used on all variables | |||
* `.auto` needs to be an automatic function, such as `tests_auto`. It will be used by default on all variables to select a test | |||
* `.default` needs to be a single-term formula containing a statistical test function that will be used on all variables | |||
You can also provide overrides to use specific tests for specific variables. | |||
This is done using list items named as the variable and containing a single-term formula function. | |||
@@ -333,55 +334,22 @@ mtcars %>% | |||
mpg = ~t.test)) %>% | |||
datatable() | |||
``` | |||
<br> | |||
You might wonder why the formula expression. That is needed to capture the test name, and to provide it in the resulting table. | |||
As with statistical functions, any statistical test function defined in R can be used. | |||
The conditions are that the function | |||
* accepts a formula (`variable ~ grouping_variable`) as a first positional argument (as is the case with most tests, like `t.test`), and | |||
* returns an object with a `p.value` element. | |||
Several convenience function are provided: formula versions for `chisq.test` and `fisher.test` using generic S3 methods (thus the behavior of standard calls to `chisq.test` and `fisher.test` are not modified), and `ANOVA`, a partial application of `oneway.test` with parameter *var.equal* = T. | |||
# Tips and tricks | |||
In the *stats* argument, you can not only feed function names, but even arbitrary function definitions, functional sequences (a feature provided with the pipe (`%>%`)), or partial applications (with the **purrr** package): | |||
Here's an example of **purrr** style function: | |||
```{r} | |||
mtcars %>% | |||
desctable(stats = list("N" = length, | |||
"Sum of squares" = function(x) sum(x^2), | |||
"Q1" = . %>% quantile(prob = .25), | |||
"Q3" = purrr::partial(quantile, probs = .75))) %>% | |||
datatable() | |||
``` | |||
<br> | |||
In the *tests* arguments, you can also provide function definitions, functional sequences, and partial applications in the formulas: | |||
```{r} | |||
iris %>% | |||
group_by(Species) %>% | |||
group_by(Petal.Length > 5) %>% | |||
desctable(tests = list(.auto = tests_auto, | |||
Sepal.Width = ~function(f) oneway.test(f, var.equal = F), | |||
Petal.Length = ~. %>% oneway.test(var.equal = T), | |||
Sepal.Length = ~purrr::partial(oneway.test, var.equal = T))) %>% | |||
datatable() | |||
Petal.Width = ~oneway.test(., var.equal = T))) | |||
``` | |||
<br> | |||
This allows you to modulate the behavior of `desctable` in every detail, such as using paired tests, or non *htest* tests. | |||
```{r} | |||
# This is a contrived example, which would be better solved with a dedicated function | |||
library(survival) | |||
As with statistical functions, **any** statistical test function defined in R can be used. | |||
bladder$surv <- Surv(bladder$stop, bladder$event) | |||
The conditions are that the function | |||
bladder %>% | |||
group_by(rx) %>% | |||
desctable(tests = list(.default = ~wilcox.test, | |||
surv = ~. %>% survdiff %>% .$chisq %>% pchisq(1, lower.tail = F) %>% list(p.value = .))) %>% | |||
datatable() | |||
``` | |||
* accepts a formula (`variable ~ grouping_variable`) as a first positional argument (as is the case with most tests, like `t.test`), and | |||
* returns an object with a `p.value` element. | |||
Several convenience function are provided: formula versions for `chisq.test` and `fisher.test` using generic S3 methods (thus the behavior of standard calls to `chisq.test` and `fisher.test` are not modified), and `ANOVA`, a partial application of `oneway.test` with parameter *var.equal* = T. |