@@ -0,0 +1,17 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/convenience_functions.R | |||
\name{ANOVA} | |||
\alias{ANOVA} | |||
\title{Wrapper for oneway.test(var.equal = T)} | |||
\usage{ | |||
ANOVA(formula) | |||
} | |||
\arguments{ | |||
\item{formula}{An anova formula (\code{variable ~ grouping variable})} | |||
} | |||
\description{ | |||
Wrapper for oneway.test(var.equal = T) | |||
} | |||
\seealso{ | |||
\code{\link{oneway.test}} | |||
} |
@@ -0,0 +1,17 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/convenience_functions.R | |||
\name{IQR} | |||
\alias{IQR} | |||
\title{Return the inter-quartile range} | |||
\usage{ | |||
IQR(x) | |||
} | |||
\arguments{ | |||
\item{x}{A vector} | |||
} | |||
\value{ | |||
The IQR | |||
} | |||
\description{ | |||
Safe version of IQR for statify | |||
} |
@@ -0,0 +1,19 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/output.R | |||
\name{as.data.frame.desctable} | |||
\alias{as.data.frame.desctable} | |||
\title{As.data.frame method for desctable} | |||
\usage{ | |||
\method{as.data.frame}{desctable}(x, ...) | |||
} | |||
\arguments{ | |||
\item{x}{A desctable} | |||
\item{...}{Additional as.data.frame parameters} | |||
} | |||
\value{ | |||
A flat dataframe | |||
} | |||
\description{ | |||
As.data.frame method for desctable | |||
} |
@@ -0,0 +1,171 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/convenience_functions.R | |||
\name{chisq.test} | |||
\alias{chisq.test} | |||
\alias{chisq.test.default} | |||
\alias{chisq.test.formula} | |||
\title{Pearson's Chi-squared Test for Count Data} | |||
\source{ | |||
The code for Monte Carlo simulation is a C translation of the Fortran algorithm of Patefield (1981). | |||
} | |||
\usage{ | |||
chisq.test(x, y, correct, p, rescale.p, simulate.p.value, B) | |||
\method{chisq.test}{default}(x, y = NULL, correct = TRUE, | |||
p = rep(1/length(x), length(x)), rescale.p = FALSE, | |||
simulate.p.value = FALSE, B = 2000) | |||
\method{chisq.test}{formula}(x, y = NULL, correct = T, | |||
p = rep(1/length(x), length(x)), rescale.p = F, simulate.p.value = F, | |||
B = 2000) | |||
} | |||
\arguments{ | |||
\item{x}{a numeric vector, or matrix, or formula of the form \code{lhs ~ rhs} where \code{lhs} and \code{rhs} are factors. \code{x} and \code{y} can also both be factors.} | |||
\item{y}{a numeric vector; ignored if \code{x} is a matrix or a formula. If \code{x} is a factor, \code{y} should be a factor of the same length.} | |||
\item{correct}{a logical indicating whether to apply continuity | |||
correction when computing the test statistic for 2 by 2 tables: one | |||
half is subtracted from all \eqn{|O - E|} differences; however, the | |||
correction will not be bigger than the differences themselves. No correction | |||
is done if \code{simulate.p.value = TRUE}.} | |||
\item{p}{a vector of probabilities of the same length of \code{x}. | |||
An error is given if any entry of \code{p} is negative.} | |||
\item{rescale.p}{a logical scalar; if TRUE then \code{p} is rescaled | |||
(if necessary) to sum to 1. If \code{rescale.p} is FALSE, and | |||
\code{p} does not sum to 1, an error is given.} | |||
\item{simulate.p.value}{a logical indicating whether to compute | |||
p-values by Monte Carlo simulation.} | |||
\item{B}{an integer specifying the number of replicates used in the | |||
Monte Carlo test.} | |||
} | |||
\value{ | |||
A list with class \code{"htest"} containing the following components: | |||
statistic: the value the chi-squared test statistic. | |||
parameter: the degrees of freedom of the approximate chi-squared | |||
distribution of the test statistic, \code{NA} if the p-value is | |||
computed by Monte Carlo simulation. | |||
p.value: the p-value for the test. | |||
method: a character string indicating the type of test performed, and | |||
whether Monte Carlo simulation or continuity correction was | |||
used. | |||
data.name: a character string giving the name(s) of the data. | |||
observed: the observed counts. | |||
expected: the expected counts under the null hypothesis. | |||
residuals: the Pearson residuals, ‘(observed - expected) / | |||
sqrt(expected)’. | |||
stdres: standardized residuals, \code{(observed - expected) / sqrt(V)}, | |||
where \code{V} is the residual cell variance (Agresti, 2007, | |||
section 2.4.5 for the case where \code{x} is a matrix, ‘n * p * (1 | |||
- p)’ otherwise). | |||
} | |||
\description{ | |||
\code{chisq.test} performs chi-squared contingency table tests and goodness-of-fit tests, with an added method for formulas. | |||
} | |||
\details{ | |||
If \code{x} is a matrix with one row or column, or if \code{x} is a vector | |||
and \code{y} is not given, then a _goodness-of-fit test_ is performed | |||
(\code{x} is treated as a one-dimensional contingency table). The | |||
entries of \code{x} must be non-negative integers. In this case, the | |||
hypothesis tested is whether the population probabilities equal | |||
those in \code{p}, or are all equal if \code{p} is not given. | |||
If \code{x} is a matrix with at least two rows and columns, it is taken | |||
as a two-dimensional contingency table: the entries of \code{x} must be | |||
non-negative integers. Otherwise, \code{x} and \code{y} must be vectors or | |||
factors of the same length; cases with missing values are removed, | |||
the objects are coerced to factors, and the contingency table is | |||
computed from these. Then Pearson's chi-squared test is performed | |||
of the null hypothesis that the joint distribution of the cell | |||
counts in a 2-dimensional contingency table is the product of the | |||
row and column marginals. | |||
If \code{simulate.p.value} is \code{FALSE}, the p-value is computed from the | |||
asymptotic chi-squared distribution of the test statistic; | |||
continuity correction is only used in the 2-by-2 case (if | |||
\code{correct} is \code{TRUE}, the default). Otherwise the p-value is | |||
computed for a Monte Carlo test (Hope, 1968) with \code{B} replicates. | |||
In the contingency table case simulation is done by random | |||
sampling from the set of all contingency tables with given | |||
marginals, and works only if the marginals are strictly positive. | |||
Continuity correction is never used, and the statistic is quoted | |||
without it. Note that this is not the usual sampling situation | |||
assumed for the chi-squared test but rather that for Fisher's | |||
exact test. | |||
In the goodness-of-fit case simulation is done by random sampling | |||
from the discrete distribution specified by \code{p}, each sample being | |||
of size \code{n = sum(x)}. This simulation is done in R and may be | |||
slow. | |||
} | |||
\examples{ | |||
\dontrun{ | |||
## From Agresti(2007) p.39 | |||
M <- as.table(rbind(c(762, 327, 468), c(484, 239, 477))) | |||
dimnames(M) <- list(gender = c("F", "M"), | |||
party = c("Democrat","Independent", "Republican")) | |||
(Xsq <- chisq.test(M)) # Prints test summary | |||
Xsq$observed # observed counts (same as M) | |||
Xsq$expected # expected counts under the null | |||
Xsq$residuals # Pearson residuals | |||
Xsq$stdres # standardized residuals | |||
## Effect of simulating p-values | |||
x <- matrix(c(12, 5, 7, 7), ncol = 2) | |||
chisq.test(x)$p.value # 0.4233 | |||
chisq.test(x, simulate.p.value = TRUE, B = 10000)$p.value | |||
# around 0.29! | |||
## Testing for population probabilities | |||
## Case A. Tabulated data | |||
x <- c(A = 20, B = 15, C = 25) | |||
chisq.test(x) | |||
chisq.test(as.table(x)) # the same | |||
x <- c(89,37,30,28,2) | |||
p <- c(40,20,20,15,5) | |||
try( | |||
chisq.test(x, p = p) # gives an error | |||
) | |||
chisq.test(x, p = p, rescale.p = TRUE) | |||
# works | |||
p <- c(0.40,0.20,0.20,0.19,0.01) | |||
# Expected count in category 5 | |||
# is 1.86 < 5 ==> chi square approx. | |||
chisq.test(x, p = p) # maybe doubtful, but is ok! | |||
chisq.test(x, p = p, simulate.p.value = TRUE) | |||
## Case B. Raw data | |||
x <- trunc(5 * runif(100)) | |||
chisq.test(table(x)) # NOT 'chisq.test(x)'! | |||
### | |||
} | |||
} | |||
\references{ | |||
Hope, A. C. A. (1968) A simplified Monte Carlo significance test | |||
procedure. _J. Roy, Statist. Soc. B_ *30*, 582-598. | |||
Patefield, W. M. (1981) Algorithm AS159. An efficient method of | |||
generating r x c tables with given row and column totals. | |||
_Applied Statistics_ *30*, 91-97. | |||
Agresti, A. (2007) _An Introduction to Categorical Data Analysis, | |||
2nd ed._, New York: John Wiley & Sons. Page 38. | |||
} | |||
\seealso{ | |||
For goodness-of-fit testing, notably of continuous distributions, \code{\link{ks.test}}. | |||
} |
@@ -0,0 +1,155 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/output.R | |||
\name{datatable} | |||
\alias{datatable} | |||
\alias{datatable.default} | |||
\alias{datatable.desctable} | |||
\title{Create an HTML table widget using the DataTables library} | |||
\usage{ | |||
datatable(data, ...) | |||
\method{datatable}{default}(data, options = list(), class = "display", | |||
callback = DT::JS("return table;"), caption = NULL, filter = c("none", | |||
"bottom", "top"), escape = TRUE, style = "default", width = NULL, | |||
height = NULL, elementId = NULL, | |||
fillContainer = getOption("DT.fillContainer", NULL), | |||
autoHideNavigation = getOption("DT.autoHideNavigation", NULL), | |||
selection = c("multiple", "single", "none"), extensions = list(), | |||
plugins = NULL, ...) | |||
\method{datatable}{desctable}(data, options = list(paging = F, info = F, | |||
search = F, dom = "Brtip", fixedColumns = T, fixedHeader = T, buttons = | |||
c("copy", "excel")), class = "display", | |||
callback = DT::JS("return table;"), caption = NULL, filter = c("none", | |||
"bottom", "top"), escape = FALSE, style = "default", width = NULL, | |||
height = NULL, elementId = NULL, | |||
fillContainer = getOption("DT.fillContainer", NULL), | |||
autoHideNavigation = getOption("DT.autoHideNavigation", NULL), | |||
selection = c("multiple", "single", "none"), extensions = c("FixedHeader", | |||
"FixedColumns", "Buttons"), plugins = NULL, rownames = F, digits = 2, | |||
...) | |||
} | |||
\arguments{ | |||
\item{data}{a data object (either a matrix or a data frame)} | |||
\item{...}{arguments passed to \code{format}.} | |||
\item{options}{a list of initialization options (see | |||
\url{http://datatables.net/reference/option/}); the character options | |||
wrapped in \code{\link[htmlwidgets]{JS}()} will be treated as literal | |||
JavaScript code instead of normal character strings; you can also set | |||
options globally via \code{\link{options}(DT.options = list(...))}, and | |||
global options will be merged into this \code{options} argument if set} | |||
\item{class}{the CSS class(es) of the table; see | |||
\url{http://datatables.net/manual/styling/classes}} | |||
\item{callback}{the body of a JavaScript callback function with the argument | |||
\code{table} to be applied to the DataTables instance (i.e. \code{table})} | |||
\item{caption}{the table caption; a character vector or a tag object | |||
generated from \code{htmltools::tags$caption()}} | |||
\item{filter}{whether/where to use column filters; \code{none}: no filters; | |||
\code{bottom/top}: put column filters at the bottom/top of the table; range | |||
sliders are used to filter numeric/date/time columns, select lists are used | |||
for factor columns, and text input boxes are used for character columns; if | |||
you want more control over the styles of filters, you can provide a list to | |||
this argument of the form \code{list(position = 'top', clear = TRUE, plain | |||
= FALSE)}, where \code{clear} indicates whether you want the clear buttons | |||
in the input boxes, and \code{plain} means if you want to use Bootstrap | |||
form styles or plain text input styles for the text input boxes} | |||
\item{escape}{whether to escape HTML entities in the table: \code{TRUE} means | |||
to escape the whole table, and \code{FALSE} means not to escape it; | |||
alternatively, you can specify numeric column indices or column names to | |||
indicate which columns to escape, e.g. \code{1:5} (the first 5 columns), | |||
\code{c(1, 3, 4)}, or \code{c(-1, -3)} (all columns except the first and | |||
third), or \code{c('Species', 'Sepal.Length')}} | |||
\item{style}{the style name (\url{http://datatables.net/manual/styling/}); | |||
currently only \code{'default'} and \code{'bootstrap'} are supported} | |||
\item{width}{Width/Height in pixels (optional, defaults to automatic | |||
sizing)} | |||
\item{height}{Width/Height in pixels (optional, defaults to automatic | |||
sizing)} | |||
\item{elementId}{An id for the widget (a random string by default).} | |||
\item{fillContainer}{\code{TRUE} to configure the table to automatically fill | |||
it's containing element. If the table can't fit fully into it's container | |||
then vertical and/or horizontal scrolling of the table cells will occur.} | |||
\item{autoHideNavigation}{\code{TRUE} to automatically hide navigational UI | |||
when the number of total records is less than the page size.} | |||
\item{selection}{the row/column selection mode (single or multiple selection | |||
or disable selection) when a table widget is rendered in a Shiny app; | |||
alternatively, you can use a list of the form \code{list(mode = 'multiple', | |||
selected = c(1, 3, 8), target = 'row')} to pre-select rows; the element | |||
\code{target} in the list can be \code{'column'} to enable column | |||
selection, or \code{'row+column'} to make it possible to select both rows | |||
and columns (click on the footer to select columns), or \code{'cell'} to | |||
select cells} | |||
\item{extensions}{a character vector of the names of the DataTables | |||
extensions (\url{https://datatables.net/extensions/index})} | |||
\item{plugins}{a character vector of the names of DataTables plug-ins | |||
(\url{https://rstudio.github.io/DT/plugins.html})} | |||
\item{rownames}{\code{TRUE} (show row names) or \code{FALSE} (hide row names) | |||
or a character vector of row names; by default, the row names are displayed | |||
in the first column of the table if exist (not \code{NULL})} | |||
\item{digits}{the desired number of digits after the decimal | |||
point (\code{format = "f"}) or \emph{significant} digits | |||
(\code{format = "g"}, \code{= "e"} or \code{= "fg"}). | |||
Default: 2 for integer, 4 for real numbers. If less than 0, | |||
the C default of 6 digits is used. If specified as more than 50, 50 | |||
will be used with a warning unless \code{format = "f"} where it is | |||
limited to typically 324. (Not more than 15--21 digits need be | |||
accurate, depending on the OS and compiler used. This limit is | |||
just a precaution against segfaults in the underlying C runtime.) | |||
} | |||
} | |||
\description{ | |||
This function creates an HTML widget to display rectangular data (a matrix or data frame) using the JavaScript library DataTables, with a method for \code{desctable} objects. | |||
} | |||
\note{ | |||
You are recommended to escape the table content for security reasons (e.g. XSS attacks) when using this function in Shiny or any other dynamic web applications. | |||
} | |||
\examples{ | |||
library(DT) | |||
# see the package vignette for examples and the link to website | |||
vignette('DT', package = 'DT') | |||
# some boring edge cases for testing purposes | |||
m = matrix(nrow = 0, ncol = 5, dimnames = list(NULL, letters[1:5])) | |||
datatable(m) # zero rows | |||
datatable(as.data.frame(m)) | |||
m = matrix(1, dimnames = list(NULL, 'a')) | |||
datatable(m) # one row and one column | |||
datatable(as.data.frame(m)) | |||
m = data.frame(a = 1, b = 2, c = 3) | |||
datatable(m) | |||
datatable(as.matrix(m)) | |||
# dates | |||
datatable(data.frame( | |||
date = seq(as.Date("2015-01-01"), by = "day", length.out = 5), x = 1:5 | |||
)) | |||
datatable(data.frame(x = Sys.Date())) | |||
datatable(data.frame(x = Sys.time())) | |||
### | |||
} | |||
\references{ | |||
See \url{http://rstudio.github.io/DT} for the full documentation. | |||
} |
@@ -0,0 +1,111 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
\name{desctable} | |||
\alias{desctable} | |||
\alias{desctable.default} | |||
\alias{desctable.grouped_df} | |||
\title{Generate a statistics table} | |||
\usage{ | |||
desctable(data, stats, tests, labels) | |||
\method{desctable}{default}(data, stats = stats_auto, tests, labels = NULL) | |||
\method{desctable}{grouped_df}(data, stats = stats_auto, tests = tests_auto, | |||
labels = NULL) | |||
} | |||
\arguments{ | |||
\item{data}{The dataframe to analyze} | |||
\item{stats}{A list of named statistics to apply to each element of the dataframe, or a function returning a list of named statistics} | |||
\item{tests}{A list of statistical tests to use when calling desctable with a grouped_df} | |||
\item{labels}{A named character vector of labels to use instead of variable names} | |||
} | |||
\value{ | |||
A desctable object, which prints to a table of statistics for all variables | |||
} | |||
\description{ | |||
Generate a statistics table with the chosen statistical functions, and tests if given a \code{"grouped"} dataframe. | |||
} | |||
\section{Labels}{ | |||
labels is an option named character vector used to make the table prettier. | |||
If given, the variable names for which there is a label will be replaced by their corresponding label. | |||
Not all variables need to have a label, and labels for non-existing variables are ignored. | |||
labels must be given in the form c(unquoted_variable_name = "label") | |||
} | |||
\section{Stats}{ | |||
The stats can be a function which takes a dataframe and returns a list of statistical functions to use. | |||
stats can also be a named list of statistical functions, or formulas. | |||
The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats. If an element of the list is a formula, it can be used to conditionally use stats depending on the variable. | |||
The general form is \code{condition ~ T | F}, and can be nested, such as \code{is.factor ~ percent | (is.normal ~ mean | median)}, for example. | |||
} | |||
\section{Tests}{ | |||
The tests can be a function which takes a variable and a grouping variable, and returns an appropriate statistical test to use in that case. | |||
tests can also be a named list of statistical test functions, associating the name of a variable in the data, and a test to use specifically for that variable. | |||
That test name must be expressed as a single-term formula (e.g. \code{~t.test}). You don't have to specify tests for all the variables: a default test for all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}. | |||
If data is a grouped dataframe (using \code{group_by}), subtables are created and statistic tests are performed over each sub-group. | |||
} | |||
\section{Output}{ | |||
The output is a desctable object, which is a list of named dataframes that can be further manipulated. Methods for printing, using in \pkg{pander} and \pkg{DT} are present. Printing reduces the object to a dataframe. | |||
} | |||
\examples{ | |||
iris \%>\% | |||
desctable | |||
# Does the same as stats_auto here | |||
iris \%>\% | |||
desctable(stats = list("N" = length, | |||
"\%/Mean" = is.factor ~ percent | (is.normal ~ mean), | |||
"sd" = is.normal ~ sd, | |||
"Med" = is.normal ~ NA | median, | |||
"IQR" = is.normal ~ NA | IQR)) | |||
# With labels | |||
mtcars \%>\% desctable(labels = c(hp = "Horse Power", | |||
cyl = "Cylinders", | |||
mpg = "Miles per gallon")) | |||
# With grouping on a factor | |||
iris \%>\% | |||
group_by(Species) \%>\% | |||
desctable(stats = stats_default) | |||
# With nested grouping, on arbitrary variables | |||
mtcars \%>\% | |||
group_by(vs, cyl) \%>\% | |||
desctable | |||
# With grouping on a condition, and choice of tests | |||
iris \%>\% | |||
group_by(Petal.Length > 5) \%>\% | |||
desctable(tests = list(.auto = tests_auto, Species = ~chisq.test)) | |||
} | |||
\seealso{ | |||
\code{\link{stats_auto}} | |||
\code{\link{tests_auto}} | |||
\code{\link{print.desctable}} | |||
\code{\link{pander.desctable}} | |||
\code{\link{datatable.desctable}} | |||
} |
@@ -0,0 +1,212 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/convenience_functions.R | |||
\name{fisher.test} | |||
\alias{fisher.test} | |||
\alias{fisher.test.default} | |||
\alias{fisher.test.formula} | |||
\title{Fisher's Exact Test for Count Data} | |||
\usage{ | |||
fisher.test(x, y, workspace, hybrid, control, or, alternative, conf.int, | |||
conf.level, simulate.p.value, B) | |||
\method{fisher.test}{default}(x, ...) | |||
\method{fisher.test}{formula}(x, y = NULL, workspace = 2e+05, hybrid = F, | |||
control = list(), or = 1, alternative = "two.sided", conf.int = T, | |||
conf.level = 0.95, simulate.p.value = F, B = 2000) | |||
} | |||
\arguments{ | |||
\item{x}{either a two-dimensional contingency table in matrix form, a factor object, or a formula of the form \code{lhs ~ rhs} where \code{lhs} and \code{rhs} are factors.} | |||
\item{y}{a factor object; ignored if \code{x} is a matrix or a formula.} | |||
\item{workspace}{an integer specifying the size of the workspace | |||
used in the network algorithm. In units of 4 bytes. Only used for | |||
non-simulated p-values larger than \eqn{2 \times 2}{2 by 2} tables. | |||
Since \R version 3.5.0, this also increases the internal stack size | |||
which allows larger problems to be solved, however sometimes needing | |||
hours. In such cases, \code{simulate.p.values=TRUE} may be more | |||
reasonable.} | |||
\item{hybrid}{a logical. Only used for larger than \eqn{2 \times 2}{2 by 2} | |||
tables, in which cases it indicates whether the exact probabilities | |||
(default) or a hybrid approximation thereof should be computed.} | |||
\item{control}{a list with named components for low level algorithm | |||
control. At present the only one used is \code{"mult"}, a positive | |||
integer \eqn{\ge 2} with default 30 used only for larger than | |||
\eqn{2 \times 2}{2 by 2} tables. This says how many times as much | |||
space should be allocated to paths as to keys: see file | |||
\file{fexact.c} in the sources of this package.} | |||
\item{or}{the hypothesized odds ratio. Only used in the | |||
\eqn{2 \times 2}{2 by 2} case.} | |||
\item{alternative}{indicates the alternative hypothesis and must be | |||
one of \code{"two.sided"}, \code{"greater"} or \code{"less"}. | |||
You can specify just the initial letter. Only used in the | |||
\eqn{2 \times 2}{2 by 2} case.} | |||
\item{conf.int}{logical indicating if a confidence interval for the | |||
odds ratio in a \eqn{2 \times 2}{2 by 2} table should be | |||
computed (and returned).} | |||
\item{conf.level}{confidence level for the returned confidence | |||
interval. Only used in the \eqn{2 \times 2}{2 by 2} case and if | |||
\code{conf.int = TRUE}.} | |||
\item{simulate.p.value}{a logical indicating whether to compute | |||
p-values by Monte Carlo simulation, in larger than \eqn{2 \times | |||
2}{2 by 2} tables.} | |||
\item{B}{an integer specifying the number of replicates used in the | |||
Monte Carlo test.} | |||
\item{...}{additional params to feed to original fisher.test} | |||
} | |||
\value{ | |||
A list with class \code{"htest"} containing the following components: | |||
p.value: the p-value of the test. | |||
conf.int: a confidence interval for the odds ratio. Only present in | |||
the 2 by 2 case and if argument \code{conf.int = TRUE}. | |||
estimate: an estimate of the odds ratio. Note that the _conditional_ | |||
Maximum Likelihood Estimate (MLE) rather than the | |||
unconditional MLE (the sample odds ratio) is used. Only | |||
present in the 2 by 2 case. | |||
null.value: the odds ratio under the null, \code{or}. Only present in the 2 | |||
by 2 case. | |||
alternative: a character string describing the alternative hypothesis. | |||
method: the character string \code{"Fisher's Exact Test for Count Data"}. | |||
data.name: a character string giving the names of the data. | |||
} | |||
\description{ | |||
Performs Fisher's exact test for testing the null of independence | |||
of rows and columns in a contingency table with fixed marginals, or with a formula expression. | |||
} | |||
\details{ | |||
If \code{x} is a matrix, it is taken as a two-dimensional contingency | |||
table, and hence its entries should be nonnegative integers. | |||
Otherwise, both \code{x} and \code{y} must be vectors of the same length. | |||
Incomplete cases are removed, the vectors are coerced into factor | |||
objects, and the contingency table is computed from these. | |||
For 2 by 2 cases, p-values are obtained directly using the | |||
(central or non-central) hypergeometric distribution. Otherwise, | |||
computations are based on a C version of the FORTRAN subroutine | |||
FEXACT which implements the network developed by Mehta and Patel | |||
(1986) and improved by Clarkson, Fan and Joe (1993). The FORTRAN | |||
code can be obtained from \url{http://www.netlib.org/toms/643}. | |||
Note this fails (with an error message) when the entries of the | |||
table are too large. (It transposes the table if necessary so it | |||
has no more rows than columns. One constraint is that the product | |||
of the row marginals be less than 2^31 - 1.) | |||
For 2 by 2 tables, the null of conditional independence is | |||
equivalent to the hypothesis that the odds ratio equals one. | |||
\code{Exact} inference can be based on observing that in general, given | |||
all marginal totals fixed, the first element of the contingency | |||
table has a non-central hypergeometric distribution with | |||
non-centrality parameter given by the odds ratio (Fisher, 1935). | |||
The alternative for a one-sided test is based on the odds ratio, | |||
so \code{alternative = "greater"} is a test of the odds ratio being | |||
bigger than \code{or}. | |||
Two-sided tests are based on the probabilities of the tables, and | |||
take as \code{more extreme} all tables with probabilities less than or | |||
equal to that of the observed table, the p-value being the sum of | |||
such probabilities. | |||
For larger than 2 by 2 tables and \code{hybrid = TRUE}, asymptotic | |||
chi-squared probabilities are only used if the ‘Cochran | |||
conditions’ are satisfied, that is if no cell has count zero, and | |||
more than 80% of the cells have counts at least 5: otherwise the | |||
exact calculation is used. | |||
Simulation is done conditional on the row and column marginals, | |||
and works only if the marginals are strictly positive. (A C | |||
translation of the algorithm of Patefield (1981) is used.) | |||
} | |||
\examples{ | |||
\dontrun{ | |||
## Agresti (1990, p. 61f; 2002, p. 91) Fisher's Tea Drinker | |||
## A British woman claimed to be able to distinguish whether milk or | |||
## tea was added to the cup first. To test, she was given 8 cups of | |||
## tea, in four of which milk was added first. The null hypothesis | |||
## is that there is no association between the true order of pouring | |||
## and the woman's guess, the alternative that there is a positive | |||
## association (that the odds ratio is greater than 1). | |||
TeaTasting <- | |||
matrix(c(3, 1, 1, 3), | |||
nrow = 2, | |||
dimnames = list(Guess = c("Milk", "Tea"), | |||
Truth = c("Milk", "Tea"))) | |||
fisher.test(TeaTasting, alternative = "greater") | |||
## => p = 0.2429, association could not be established | |||
## Fisher (1962, 1970), Criminal convictions of like-sex twins | |||
Convictions <- | |||
matrix(c(2, 10, 15, 3), | |||
nrow = 2, | |||
dimnames = | |||
list(c("Dizygotic", "Monozygotic"), | |||
c("Convicted", "Not convicted"))) | |||
Convictions | |||
fisher.test(Convictions, alternative = "less") | |||
fisher.test(Convictions, conf.int = FALSE) | |||
fisher.test(Convictions, conf.level = 0.95)$conf.int | |||
fisher.test(Convictions, conf.level = 0.99)$conf.int | |||
## A r x c table Agresti (2002, p. 57) Job Satisfaction | |||
Job <- matrix(c(1,2,1,0, 3,3,6,1, 10,10,14,9, 6,7,12,11), 4, 4, | |||
dimnames = list(income = c("< 15k", "15-25k", "25-40k", "> 40k"), | |||
satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS"))) | |||
fisher.test(Job) | |||
fisher.test(Job, simulate.p.value = TRUE, B = 1e5) | |||
### | |||
} | |||
} | |||
\references{ | |||
Agresti, A. (1990) _Categorical data analysis_. New York: Wiley. | |||
Pages 59-66. | |||
Agresti, A. (2002) _Categorical data analysis_. Second edition. | |||
New York: Wiley. Pages 91-101. | |||
Fisher, R. A. (1935) The logic of inductive inference. _Journal | |||
of the Royal Statistical Society Series A_ *98*, 39-54. | |||
Fisher, R. A. (1962) Confidence limits for a cross-product ratio. | |||
_Australian Journal of Statistics_ *4*, 41. | |||
Fisher, R. A. (1970) _Statistical Methods for Research Workers._ | |||
Oliver & Boyd. | |||
Mehta, C. R. and Patel, N. R. (1986) Algorithm 643. FEXACT: A | |||
Fortran subroutine for Fisher's exact test on unordered r*c | |||
contingency tables. _ACM Transactions on Mathematical Software_, | |||
*12*, 154-161. | |||
Clarkson, D. B., Fan, Y. and Joe, H. (1993) A Remark on Algorithm | |||
643: FEXACT: An Algorithm for Performing Fisher's Exact Test in r | |||
x c Contingency Tables. _ACM Transactions on Mathematical | |||
Software_, *19*, 484-488. | |||
Patefield, W. M. (1981) Algorithm AS159. An efficient method of | |||
generating r x c tables with given row and column totals. | |||
_Applied Statistics_ *30*, 91-97. | |||
} | |||
\seealso{ | |||
\code{\link{chisq.test}} | |||
\code{fisher.exact} in package \pkg{kexact2x2} for alternative | |||
interpretations of two-sided tests and confidence intervals for 2 | |||
by 2 tables. | |||
} |
@@ -0,0 +1,17 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{flatten_desctable} | |||
\alias{flatten_desctable} | |||
\title{Flatten a desctable to a dataframe recursively} | |||
\usage{ | |||
flatten_desctable(desctable) | |||
} | |||
\arguments{ | |||
\item{desctable}{A desctable object} | |||
} | |||
\value{ | |||
A flat dataframe | |||
} | |||
\description{ | |||
Flatten a desctable to a dataframe recursively | |||
} |
@@ -0,0 +1,12 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/imports.R | |||
\name{group_by} | |||
\alias{group_by} | |||
\title{Group a tbl by one or more variables.} | |||
\usage{ | |||
group_by(.data, ..., add = FALSE) | |||
} | |||
\description{ | |||
Group a tbl by one or more variables. | |||
} | |||
\keyword{internal} |
@@ -0,0 +1,17 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{head_dataframe} | |||
\alias{head_dataframe} | |||
\title{Build the header for dataframe} | |||
\usage{ | |||
head_dataframe(head) | |||
} | |||
\arguments{ | |||
\item{head}{A headerList object} | |||
} | |||
\value{ | |||
A names vector | |||
} | |||
\description{ | |||
Build the header for dataframe | |||
} |
@@ -0,0 +1,17 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{head_datatable} | |||
\alias{head_datatable} | |||
\title{Build the header for datatable} | |||
\usage{ | |||
head_datatable(head) | |||
} | |||
\arguments{ | |||
\item{head}{A headerList object} | |||
} | |||
\value{ | |||
An htmltools$tags object containing the header | |||
} | |||
\description{ | |||
Build the header for datatable | |||
} |
@@ -0,0 +1,17 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{head_pander} | |||
\alias{head_pander} | |||
\title{Build the header for pander} | |||
\usage{ | |||
head_pander(head) | |||
} | |||
\arguments{ | |||
\item{head}{A headerList object} | |||
} | |||
\value{ | |||
A names vector | |||
} | |||
\description{ | |||
Build the header for pander | |||
} |
@@ -0,0 +1,20 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{header} | |||
\alias{header} | |||
\title{Build header} | |||
\usage{ | |||
header(desctable, output = c("pander", "datatable", "dataframe")) | |||
} | |||
\arguments{ | |||
\item{desctable}{A desctable object} | |||
\item{output}{An output format for the header} | |||
} | |||
\value{ | |||
A header object in the output format | |||
} | |||
\description{ | |||
Take a desctable object and create a suitable header for the mentionned output. | |||
Output can be one of "pander", "datatable", or "dataframe". | |||
} |
@@ -0,0 +1,17 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{headerList} | |||
\alias{headerList} | |||
\title{Build a header list object} | |||
\usage{ | |||
headerList(desctable) | |||
} | |||
\arguments{ | |||
\item{desctable}{A desctable} | |||
} | |||
\value{ | |||
A nested list of headers with colspans | |||
} | |||
\description{ | |||
Build a header list object | |||
} |
@@ -0,0 +1,21 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{insert} | |||
\alias{insert} | |||
\title{Insert a vector y inside another vector x at position} | |||
\usage{ | |||
insert(x, y, position) | |||
} | |||
\arguments{ | |||
\item{x}{A vector} | |||
\item{y}{A vector or list of vectors} | |||
\item{position}{The position / vector of positions to insert vector(s) y in vector x} | |||
} | |||
\value{ | |||
The combined vector | |||
} | |||
\description{ | |||
Insert a vector y inside another vector x at position | |||
} |
@@ -0,0 +1,18 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/convenience_functions.R | |||
\name{is.normal} | |||
\alias{is.normal} | |||
\title{Test if distribution is normal} | |||
\usage{ | |||
is.normal(x) | |||
} | |||
\arguments{ | |||
\item{x}{A numerical vector} | |||
} | |||
\value{ | |||
A boolean | |||
} | |||
\description{ | |||
Test if distribution is normal. | |||
The condition for normality is length > 30 and non-significant Shapiro-Wilks test with p > .1 | |||
} |
@@ -0,0 +1,14 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/convenience_functions.R | |||
\name{no.test} | |||
\alias{no.test} | |||
\title{No test} | |||
\usage{ | |||
no.test(formula) | |||
} | |||
\arguments{ | |||
\item{formula}{A formula} | |||
} | |||
\description{ | |||
An empty test | |||
} |
@@ -0,0 +1,35 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/output.R | |||
\name{pander.desctable} | |||
\alias{pander.desctable} | |||
\title{Pander method for desctable} | |||
\usage{ | |||
pander.desctable(x = NULL, digits = 2, justify = "left", missing = "", | |||
keep.line.breaks = T, split.tables = Inf, emphasize.rownames = F, ...) | |||
} | |||
\arguments{ | |||
\item{x}{A desctable} | |||
\item{digits}{passed to \code{format}. Can be a vector specifying values for each column (has to be the same length as number of columns).} | |||
\item{justify}{defines alignment in cells passed to \code{format}. Can be \code{left}, \code{right} or \code{centre}, which latter can be also spelled as \code{center}. Defaults to \code{centre}. Can be abbreviated to a string consisting of the letters \code{l}, \code{c} and \code{r} (e.g. 'lcr' instead of c('left', 'centre', 'right').} | |||
\item{missing}{string to replace missing values} | |||
\item{keep.line.breaks}{(default: \code{FALSE}) if to keep or remove line breaks from cells in a table} | |||
\item{split.tables}{where to split wide tables to separate tables. The default value (\code{80}) suggests the conventional number of characters used in a line, feel free to change (e.g. to \code{Inf} to disable this feature) if you are not using a VT100 terminal any more :)} | |||
\item{emphasize.rownames}{boolean (default: \code{TRUE}) if row names should be highlighted} | |||
\item{...}{unsupported extra arguments directly placed into \code{/dev/null}} | |||
} | |||
\description{ | |||
Pander method to output a desctable | |||
} | |||
\details{ | |||
Uses \code{pandoc.table}, with some default parameters (\code{digits = 2}, \code{justify = "left"}, \code{missing = ""}, \code{keep.line.breaks = T}, \code{split.tables = Inf}, and \code{emphasize.rownames = F}), that you can override if needed. | |||
} | |||
\seealso{ | |||
\code{\link{pandoc.table}} | |||
} |
@@ -0,0 +1,28 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/utils.R | |||
\name{parse_formula} | |||
\alias{parse_formula} | |||
\title{Parse a formula} | |||
\usage{ | |||
parse_formula(x, f) | |||
} | |||
\arguments{ | |||
\item{x}{The variable to test it on} | |||
\item{f}{A formula to parse} | |||
} | |||
\value{ | |||
A function to use as a stat/test | |||
} | |||
\description{ | |||
Parse a formula defining the conditions to pick a stat/test | |||
} | |||
\details{ | |||
Parse a formula defining the conditions to pick a stat/test | |||
and return the function to use. | |||
The formula is to be given in the form of | |||
conditional ~ T | F | |||
and conditions can be nested such as | |||
conditional1 ~ (conditional2 ~ T | F) | F | |||
The FALSE option can be omitted, and the TRUE can be replaced with NA | |||
} |
@@ -0,0 +1,18 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/convenience_functions.R | |||
\name{percent} | |||
\alias{percent} | |||
\title{Return the percentages for the levels of a factor} | |||
\usage{ | |||
percent(x) | |||
} | |||
\arguments{ | |||
\item{x}{A factor} | |||
} | |||
\value{ | |||
A nlevels(x) + 1 length vector of percentages | |||
} | |||
\description{ | |||
Return a compatible vector of length nlevels(x) + 1 | |||
to print the percentages of each level of a factor | |||
} |
@@ -0,0 +1,12 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/imports.R | |||
\name{\%>\%} | |||
\alias{\%>\%} | |||
\title{Pipe operator} | |||
\usage{ | |||
lhs \%>\% rhs | |||
} | |||
\description{ | |||
Pipe operator | |||
} | |||
\keyword{internal} |
@@ -0,0 +1,19 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/output.R | |||
\name{print.desctable} | |||
\alias{print.desctable} | |||
\title{Print method for desctable} | |||
\usage{ | |||
\method{print}{desctable}(x, ...) | |||
} | |||
\arguments{ | |||
\item{x}{A desctable} | |||
\item{...}{Additional print parameters} | |||
} | |||
\value{ | |||
A flat dataframe | |||
} | |||
\description{ | |||
Print method for desctable | |||
} |
@@ -0,0 +1,19 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
\name{statColumn} | |||
\alias{statColumn} | |||
\title{Generate one statistic for all variables} | |||
\usage{ | |||
statColumn(stat, data) | |||
} | |||
\arguments{ | |||
\item{stat}{The statistic to use} | |||
\item{data}{The dataframe to apply the statistic to} | |||
} | |||
\value{ | |||
A vector for one statistic column | |||
} | |||
\description{ | |||
Generate one statistic for all variables | |||
} |
@@ -0,0 +1,19 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
\name{statTable} | |||
\alias{statTable} | |||
\title{Generate the table of all statistics for all variables} | |||
\usage{ | |||
statTable(data, stats) | |||
} | |||
\arguments{ | |||
\item{data}{The dataframe to apply the statistic to} | |||
\item{stats}{A list of named statistics to use} | |||
} | |||
\value{ | |||
A dataframe of all statistics for all variables | |||
} | |||
\description{ | |||
Generate the table of all statistics for all variables | |||
} |
@@ -0,0 +1,34 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/stats.R | |||
\name{statify} | |||
\alias{statify} | |||
\alias{statify.default} | |||
\alias{statify.formula} | |||
\title{Transform any function into a valid stat function for the table} | |||
\usage{ | |||
statify(x, f) | |||
\method{statify}{default}(x, f) | |||
\method{statify}{formula}(x, f) | |||
} | |||
\arguments{ | |||
\item{x}{A vector} | |||
\item{f}{The function to try to apply, or a formula combining two functions} | |||
} | |||
\value{ | |||
The results for the function applied on the vector, compatible with the format of the result table | |||
} | |||
\description{ | |||
Transform a function into a valid stat function for the table | |||
} | |||
\details{ | |||
NA values are removed from the data | |||
Applying the function on a numerical vector should return one value | |||
Applying the function on a factor should return nlevels + 1 value, or one value per factor level | |||
See \code{parse_formula} for the usage for formulaes. | |||
} |
@@ -0,0 +1,37 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/stats.R | |||
\name{stats_default} | |||
\alias{stats_default} | |||
\alias{stats_normal} | |||
\alias{stats_nonnormal} | |||
\alias{stats_auto} | |||
\title{Functions to create a list of statistics to use in desctable} | |||
\usage{ | |||
stats_default(data) | |||
stats_normal(data) | |||
stats_nonnormal(data) | |||
stats_auto(data) | |||
} | |||
\arguments{ | |||
\item{data}{The dataframe to apply the statistic to} | |||
} | |||
\value{ | |||
A list of statistics to use, potentially assessed from the dataframe | |||
} | |||
\description{ | |||
These functions take a dataframe as argument and return a list of statistcs in the form accepted by desctable. | |||
} | |||
\details{ | |||
Already defined are | |||
\enumerate{ | |||
\item stats_default with length, \%, mean, sd, med and IQR | |||
\item stats_normal with length, \%, mean and sd | |||
\item stats_nonnormal with length, %, median and IQR | |||
\item stats_auto, which picks stats depending of the data | |||
} | |||
You can define your own automatic functions, as long as they take a dataframe as argument and return a list of functions or formulas defining conditions to use a stat function. | |||
} |
@@ -0,0 +1,20 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
\name{subNames} | |||
\alias{subNames} | |||
\title{Create the subtables names} | |||
\usage{ | |||
subNames(grp, df) | |||
} | |||
\arguments{ | |||
\item{grp}{Grouping factor} | |||
\item{df}{Dataframe containing the grouping factor} | |||
} | |||
\value{ | |||
A character vector with the names for the subtables | |||
} | |||
\description{ | |||
Create the subtables names, as | |||
factor: level (n=sub-group length) | |||
} |
@@ -0,0 +1,23 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
\name{subTable} | |||
\alias{subTable} | |||
\title{Create a subtable in a grouped desctable} | |||
\usage{ | |||
subTable(df, stats, tests, grps) | |||
} | |||
\arguments{ | |||
\item{df}{Dataframe to use} | |||
\item{stats}{Stats list/function to use} | |||
\item{tests}{Tests list/function to use} | |||
\item{grps}{List of symbols for grouping factors} | |||
} | |||
\value{ | |||
A nested list of statTables and testColumns | |||
} | |||
\description{ | |||
Create a subtable in a grouped desctable | |||
} |
@@ -0,0 +1,21 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
\name{testColumn} | |||
\alias{testColumn} | |||
\title{Create the pvalues column} | |||
\usage{ | |||
testColumn(df, tests, grp) | |||
} | |||
\arguments{ | |||
\item{df}{Dataframe to use for the tests} | |||
\item{tests}{Test function or list of functions} | |||
\item{grp}{Grouping factor} | |||
} | |||
\value{ | |||
A numeric vector of pvalues | |||
} | |||
\description{ | |||
Create the pvalues column | |||
} |
@@ -0,0 +1,23 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/tests.R | |||
\name{testify} | |||
\alias{testify} | |||
\title{Transform any test function into a valid test function for the table} | |||
\usage{ | |||
testify(x, f, group) | |||
} | |||
\arguments{ | |||
\item{x}{A vector} | |||
\item{f}{The function to try to apply, or a formula combining two functions} | |||
\item{group}{Grouping factor} | |||
} | |||
\value{ | |||
The results for the function applied on the vector, compatible with the format of the result table | |||
} | |||
\description{ | |||
Transform a function into a valid test function for the table | |||
Applying the function on a numerical vector should return one value | |||
Applying the function on a factor should return nlevels + 1 value, or one value per factor level | |||
} |
@@ -0,0 +1,22 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/tests.R | |||
\name{tests_auto} | |||
\alias{tests_auto} | |||
\title{Functions to choose a statistical test} | |||
\usage{ | |||
tests_auto(var, grp) | |||
} | |||
\arguments{ | |||
\item{var}{The variable to test} | |||
\item{grp}{The variable for the groups} | |||
} | |||
\value{ | |||
A statistical test function | |||
} | |||
\description{ | |||
These functions take a variable and a grouping variable as arguments, and return a statistcal test to use, expressed as a single-term formula. | |||
} | |||
\details{ | |||
Currently, only \code{tests_auto} is defined, and picks between t test, wilcoxon, anova, kruskal-wallis and fisher depending on the number of groups, the type of the variable, the normality and homoskedasticity of the distributions. | |||
} |
@@ -0,0 +1,25 @@ | |||
% Generated by roxygen2: do not edit by hand | |||
% Please edit documentation in R/build.R | |||
\name{varColumn} | |||
\alias{varColumn} | |||
\title{Generate the variable column to display as row names} | |||
\usage{ | |||
varColumn(data, labels = NULL) | |||
} | |||
\arguments{ | |||
\item{data}{The dataframe to get the names from} | |||
\item{labels}{The optional named character vector containing the keypairs var = "Label"} | |||
} | |||
\value{ | |||
A dataframe with one variable named "Variables", a character vector of variable names/labels and levels | |||
} | |||
\description{ | |||
Generates the variable column. | |||
Replaces the variable names by their label if given in the named character vector labels, and inserts levels for factors. | |||
} | |||
\details{ | |||
labels is an option named character vector used to make the table prettier. | |||
If given, the variable names for which there is a label will be replaced by their corresponding label. | |||
Not all variables need to have a label, and labels for non-existing variables are ignored. | |||
} |