Browse Source

Added documentation back for code coverage

tags/0.1.3
Maxime Wack 5 years ago
parent
commit
9f46d904f2
32 changed files with 1227 additions and 0 deletions
  1. +17
    -0
      man/ANOVA.Rd
  2. +17
    -0
      man/IQR.Rd
  3. +19
    -0
      man/as.data.frame.desctable.Rd
  4. +171
    -0
      man/chisq.test.Rd
  5. +155
    -0
      man/datatable.Rd
  6. +111
    -0
      man/desctable.Rd
  7. +212
    -0
      man/fisher.test.Rd
  8. +17
    -0
      man/flatten_desctable.Rd
  9. +12
    -0
      man/group_by.Rd
  10. +17
    -0
      man/head_dataframe.Rd
  11. +17
    -0
      man/head_datatable.Rd
  12. +17
    -0
      man/head_pander.Rd
  13. +20
    -0
      man/header.Rd
  14. +17
    -0
      man/headerList.Rd
  15. +21
    -0
      man/insert.Rd
  16. +18
    -0
      man/is.normal.Rd
  17. +14
    -0
      man/no.test.Rd
  18. +35
    -0
      man/pander.desctable.Rd
  19. +28
    -0
      man/parse_formula.Rd
  20. +18
    -0
      man/percent.Rd
  21. +12
    -0
      man/pipe.Rd
  22. +19
    -0
      man/print.desctable.Rd
  23. +19
    -0
      man/statColumn.Rd
  24. +19
    -0
      man/statTable.Rd
  25. +34
    -0
      man/statify.Rd
  26. +37
    -0
      man/stats_default.Rd
  27. +20
    -0
      man/subNames.Rd
  28. +23
    -0
      man/subTable.Rd
  29. +21
    -0
      man/testColumn.Rd
  30. +23
    -0
      man/testify.Rd
  31. +22
    -0
      man/tests_auto.Rd
  32. +25
    -0
      man/varColumn.Rd

+ 17
- 0
man/ANOVA.Rd View File

@@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convenience_functions.R
\name{ANOVA}
\alias{ANOVA}
\title{Wrapper for oneway.test(var.equal = T)}
\usage{
ANOVA(formula)
}
\arguments{
\item{formula}{An anova formula (\code{variable ~ grouping variable})}
}
\description{
Wrapper for oneway.test(var.equal = T)
}
\seealso{
\code{\link{oneway.test}}
}

+ 17
- 0
man/IQR.Rd View File

@@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convenience_functions.R
\name{IQR}
\alias{IQR}
\title{Return the inter-quartile range}
\usage{
IQR(x)
}
\arguments{
\item{x}{A vector}
}
\value{
The IQR
}
\description{
Safe version of IQR for statify
}

+ 19
- 0
man/as.data.frame.desctable.Rd View File

@@ -0,0 +1,19 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/output.R
\name{as.data.frame.desctable}
\alias{as.data.frame.desctable}
\title{As.data.frame method for desctable}
\usage{
\method{as.data.frame}{desctable}(x, ...)
}
\arguments{
\item{x}{A desctable}

\item{...}{Additional as.data.frame parameters}
}
\value{
A flat dataframe
}
\description{
As.data.frame method for desctable
}

+ 171
- 0
man/chisq.test.Rd View File

@@ -0,0 +1,171 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convenience_functions.R
\name{chisq.test}
\alias{chisq.test}
\alias{chisq.test.default}
\alias{chisq.test.formula}
\title{Pearson's Chi-squared Test for Count Data}
\source{
The code for Monte Carlo simulation is a C translation of the Fortran algorithm of Patefield (1981).
}
\usage{
chisq.test(x, y, correct, p, rescale.p, simulate.p.value, B)

\method{chisq.test}{default}(x, y = NULL, correct = TRUE,
p = rep(1/length(x), length(x)), rescale.p = FALSE,
simulate.p.value = FALSE, B = 2000)

\method{chisq.test}{formula}(x, y = NULL, correct = T,
p = rep(1/length(x), length(x)), rescale.p = F, simulate.p.value = F,
B = 2000)
}
\arguments{
\item{x}{a numeric vector, or matrix, or formula of the form \code{lhs ~ rhs} where \code{lhs} and \code{rhs} are factors. \code{x} and \code{y} can also both be factors.}

\item{y}{a numeric vector; ignored if \code{x} is a matrix or a formula. If \code{x} is a factor, \code{y} should be a factor of the same length.}

\item{correct}{a logical indicating whether to apply continuity
correction when computing the test statistic for 2 by 2 tables: one
half is subtracted from all \eqn{|O - E|} differences; however, the
correction will not be bigger than the differences themselves. No correction
is done if \code{simulate.p.value = TRUE}.}

\item{p}{a vector of probabilities of the same length of \code{x}.
An error is given if any entry of \code{p} is negative.}

\item{rescale.p}{a logical scalar; if TRUE then \code{p} is rescaled
(if necessary) to sum to 1. If \code{rescale.p} is FALSE, and
\code{p} does not sum to 1, an error is given.}

\item{simulate.p.value}{a logical indicating whether to compute
p-values by Monte Carlo simulation.}

\item{B}{an integer specifying the number of replicates used in the
Monte Carlo test.}
}
\value{
A list with class \code{"htest"} containing the following components:
statistic: the value the chi-squared test statistic.

parameter: the degrees of freedom of the approximate chi-squared
distribution of the test statistic, \code{NA} if the p-value is
computed by Monte Carlo simulation.

p.value: the p-value for the test.

method: a character string indicating the type of test performed, and
whether Monte Carlo simulation or continuity correction was
used.

data.name: a character string giving the name(s) of the data.

observed: the observed counts.

expected: the expected counts under the null hypothesis.

residuals: the Pearson residuals, ‘(observed - expected) /
sqrt(expected)’.

stdres: standardized residuals, \code{(observed - expected) / sqrt(V)},
where \code{V} is the residual cell variance (Agresti, 2007,
section 2.4.5 for the case where \code{x} is a matrix, ‘n * p * (1
- p)’ otherwise).
}
\description{
\code{chisq.test} performs chi-squared contingency table tests and goodness-of-fit tests, with an added method for formulas.
}
\details{
If \code{x} is a matrix with one row or column, or if \code{x} is a vector
and \code{y} is not given, then a _goodness-of-fit test_ is performed
(\code{x} is treated as a one-dimensional contingency table). The
entries of \code{x} must be non-negative integers. In this case, the
hypothesis tested is whether the population probabilities equal
those in \code{p}, or are all equal if \code{p} is not given.

If \code{x} is a matrix with at least two rows and columns, it is taken
as a two-dimensional contingency table: the entries of \code{x} must be
non-negative integers. Otherwise, \code{x} and \code{y} must be vectors or
factors of the same length; cases with missing values are removed,
the objects are coerced to factors, and the contingency table is
computed from these. Then Pearson's chi-squared test is performed
of the null hypothesis that the joint distribution of the cell
counts in a 2-dimensional contingency table is the product of the
row and column marginals.

If \code{simulate.p.value} is \code{FALSE}, the p-value is computed from the
asymptotic chi-squared distribution of the test statistic;
continuity correction is only used in the 2-by-2 case (if
\code{correct} is \code{TRUE}, the default). Otherwise the p-value is
computed for a Monte Carlo test (Hope, 1968) with \code{B} replicates.

In the contingency table case simulation is done by random
sampling from the set of all contingency tables with given
marginals, and works only if the marginals are strictly positive.
Continuity correction is never used, and the statistic is quoted
without it. Note that this is not the usual sampling situation
assumed for the chi-squared test but rather that for Fisher's
exact test.

In the goodness-of-fit case simulation is done by random sampling
from the discrete distribution specified by \code{p}, each sample being
of size \code{n = sum(x)}. This simulation is done in R and may be
slow.
}
\examples{
\dontrun{
## From Agresti(2007) p.39
M <- as.table(rbind(c(762, 327, 468), c(484, 239, 477)))
dimnames(M) <- list(gender = c("F", "M"),
party = c("Democrat","Independent", "Republican"))
(Xsq <- chisq.test(M)) # Prints test summary
Xsq$observed # observed counts (same as M)
Xsq$expected # expected counts under the null
Xsq$residuals # Pearson residuals
Xsq$stdres # standardized residuals


## Effect of simulating p-values
x <- matrix(c(12, 5, 7, 7), ncol = 2)
chisq.test(x)$p.value # 0.4233
chisq.test(x, simulate.p.value = TRUE, B = 10000)$p.value
# around 0.29!

## Testing for population probabilities
## Case A. Tabulated data
x <- c(A = 20, B = 15, C = 25)
chisq.test(x)
chisq.test(as.table(x)) # the same
x <- c(89,37,30,28,2)
p <- c(40,20,20,15,5)
try(
chisq.test(x, p = p) # gives an error
)
chisq.test(x, p = p, rescale.p = TRUE)
# works
p <- c(0.40,0.20,0.20,0.19,0.01)
# Expected count in category 5
# is 1.86 < 5 ==> chi square approx.
chisq.test(x, p = p) # maybe doubtful, but is ok!
chisq.test(x, p = p, simulate.p.value = TRUE)

## Case B. Raw data
x <- trunc(5 * runif(100))
chisq.test(table(x)) # NOT 'chisq.test(x)'!

###
}
}
\references{
Hope, A. C. A. (1968) A simplified Monte Carlo significance test
procedure. _J. Roy, Statist. Soc. B_ *30*, 582-598.

Patefield, W. M. (1981) Algorithm AS159. An efficient method of
generating r x c tables with given row and column totals.
_Applied Statistics_ *30*, 91-97.

Agresti, A. (2007) _An Introduction to Categorical Data Analysis,
2nd ed._, New York: John Wiley & Sons. Page 38.
}
\seealso{
For goodness-of-fit testing, notably of continuous distributions, \code{\link{ks.test}}.
}

+ 155
- 0
man/datatable.Rd View File

@@ -0,0 +1,155 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/output.R
\name{datatable}
\alias{datatable}
\alias{datatable.default}
\alias{datatable.desctable}
\title{Create an HTML table widget using the DataTables library}
\usage{
datatable(data, ...)

\method{datatable}{default}(data, options = list(), class = "display",
callback = DT::JS("return table;"), caption = NULL, filter = c("none",
"bottom", "top"), escape = TRUE, style = "default", width = NULL,
height = NULL, elementId = NULL,
fillContainer = getOption("DT.fillContainer", NULL),
autoHideNavigation = getOption("DT.autoHideNavigation", NULL),
selection = c("multiple", "single", "none"), extensions = list(),
plugins = NULL, ...)

\method{datatable}{desctable}(data, options = list(paging = F, info = F,
search = F, dom = "Brtip", fixedColumns = T, fixedHeader = T, buttons =
c("copy", "excel")), class = "display",
callback = DT::JS("return table;"), caption = NULL, filter = c("none",
"bottom", "top"), escape = FALSE, style = "default", width = NULL,
height = NULL, elementId = NULL,
fillContainer = getOption("DT.fillContainer", NULL),
autoHideNavigation = getOption("DT.autoHideNavigation", NULL),
selection = c("multiple", "single", "none"), extensions = c("FixedHeader",
"FixedColumns", "Buttons"), plugins = NULL, rownames = F, digits = 2,
...)
}
\arguments{
\item{data}{a data object (either a matrix or a data frame)}

\item{...}{arguments passed to \code{format}.}

\item{options}{a list of initialization options (see
\url{http://datatables.net/reference/option/}); the character options
wrapped in \code{\link[htmlwidgets]{JS}()} will be treated as literal
JavaScript code instead of normal character strings; you can also set
options globally via \code{\link{options}(DT.options = list(...))}, and
global options will be merged into this \code{options} argument if set}

\item{class}{the CSS class(es) of the table; see
\url{http://datatables.net/manual/styling/classes}}

\item{callback}{the body of a JavaScript callback function with the argument
\code{table} to be applied to the DataTables instance (i.e. \code{table})}

\item{caption}{the table caption; a character vector or a tag object
generated from \code{htmltools::tags$caption()}}

\item{filter}{whether/where to use column filters; \code{none}: no filters;
\code{bottom/top}: put column filters at the bottom/top of the table; range
sliders are used to filter numeric/date/time columns, select lists are used
for factor columns, and text input boxes are used for character columns; if
you want more control over the styles of filters, you can provide a list to
this argument of the form \code{list(position = 'top', clear = TRUE, plain
= FALSE)}, where \code{clear} indicates whether you want the clear buttons
in the input boxes, and \code{plain} means if you want to use Bootstrap
form styles or plain text input styles for the text input boxes}

\item{escape}{whether to escape HTML entities in the table: \code{TRUE} means
to escape the whole table, and \code{FALSE} means not to escape it;
alternatively, you can specify numeric column indices or column names to
indicate which columns to escape, e.g. \code{1:5} (the first 5 columns),
\code{c(1, 3, 4)}, or \code{c(-1, -3)} (all columns except the first and
third), or \code{c('Species', 'Sepal.Length')}}

\item{style}{the style name (\url{http://datatables.net/manual/styling/});
currently only \code{'default'} and \code{'bootstrap'} are supported}

\item{width}{Width/Height in pixels (optional, defaults to automatic
sizing)}

\item{height}{Width/Height in pixels (optional, defaults to automatic
sizing)}

\item{elementId}{An id for the widget (a random string by default).}

\item{fillContainer}{\code{TRUE} to configure the table to automatically fill
it's containing element. If the table can't fit fully into it's container
then vertical and/or horizontal scrolling of the table cells will occur.}

\item{autoHideNavigation}{\code{TRUE} to automatically hide navigational UI
when the number of total records is less than the page size.}

\item{selection}{the row/column selection mode (single or multiple selection
or disable selection) when a table widget is rendered in a Shiny app;
alternatively, you can use a list of the form \code{list(mode = 'multiple',
selected = c(1, 3, 8), target = 'row')} to pre-select rows; the element
\code{target} in the list can be \code{'column'} to enable column
selection, or \code{'row+column'} to make it possible to select both rows
and columns (click on the footer to select columns), or \code{'cell'} to
select cells}

\item{extensions}{a character vector of the names of the DataTables
extensions (\url{https://datatables.net/extensions/index})}

\item{plugins}{a character vector of the names of DataTables plug-ins
(\url{https://rstudio.github.io/DT/plugins.html})}

\item{rownames}{\code{TRUE} (show row names) or \code{FALSE} (hide row names)
or a character vector of row names; by default, the row names are displayed
in the first column of the table if exist (not \code{NULL})}

\item{digits}{the desired number of digits after the decimal
point (\code{format = "f"}) or \emph{significant} digits
(\code{format = "g"}, \code{= "e"} or \code{= "fg"}).

Default: 2 for integer, 4 for real numbers. If less than 0,
the C default of 6 digits is used. If specified as more than 50, 50
will be used with a warning unless \code{format = "f"} where it is
limited to typically 324. (Not more than 15--21 digits need be
accurate, depending on the OS and compiler used. This limit is
just a precaution against segfaults in the underlying C runtime.)
}
}
\description{
This function creates an HTML widget to display rectangular data (a matrix or data frame) using the JavaScript library DataTables, with a method for \code{desctable} objects.
}
\note{
You are recommended to escape the table content for security reasons (e.g. XSS attacks) when using this function in Shiny or any other dynamic web applications.
}
\examples{
library(DT)

# see the package vignette for examples and the link to website
vignette('DT', package = 'DT')

# some boring edge cases for testing purposes
m = matrix(nrow = 0, ncol = 5, dimnames = list(NULL, letters[1:5]))
datatable(m) # zero rows
datatable(as.data.frame(m))

m = matrix(1, dimnames = list(NULL, 'a'))
datatable(m) # one row and one column
datatable(as.data.frame(m))

m = data.frame(a = 1, b = 2, c = 3)
datatable(m)
datatable(as.matrix(m))

# dates
datatable(data.frame(
date = seq(as.Date("2015-01-01"), by = "day", length.out = 5), x = 1:5
))
datatable(data.frame(x = Sys.Date()))
datatable(data.frame(x = Sys.time()))

###
}
\references{
See \url{http://rstudio.github.io/DT} for the full documentation.
}

+ 111
- 0
man/desctable.Rd View File

@@ -0,0 +1,111 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/build.R
\name{desctable}
\alias{desctable}
\alias{desctable.default}
\alias{desctable.grouped_df}
\title{Generate a statistics table}
\usage{
desctable(data, stats, tests, labels)

\method{desctable}{default}(data, stats = stats_auto, tests, labels = NULL)

\method{desctable}{grouped_df}(data, stats = stats_auto, tests = tests_auto,
labels = NULL)
}
\arguments{
\item{data}{The dataframe to analyze}

\item{stats}{A list of named statistics to apply to each element of the dataframe, or a function returning a list of named statistics}

\item{tests}{A list of statistical tests to use when calling desctable with a grouped_df}

\item{labels}{A named character vector of labels to use instead of variable names}
}
\value{
A desctable object, which prints to a table of statistics for all variables
}
\description{
Generate a statistics table with the chosen statistical functions, and tests if given a \code{"grouped"} dataframe.
}
\section{Labels}{

labels is an option named character vector used to make the table prettier.

If given, the variable names for which there is a label will be replaced by their corresponding label.

Not all variables need to have a label, and labels for non-existing variables are ignored.

labels must be given in the form c(unquoted_variable_name = "label")
}

\section{Stats}{

The stats can be a function which takes a dataframe and returns a list of statistical functions to use.

stats can also be a named list of statistical functions, or formulas.

The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats. If an element of the list is a formula, it can be used to conditionally use stats depending on the variable.

The general form is \code{condition ~ T | F}, and can be nested, such as \code{is.factor ~ percent | (is.normal ~ mean | median)}, for example.
}

\section{Tests}{

The tests can be a function which takes a variable and a grouping variable, and returns an appropriate statistical test to use in that case.

tests can also be a named list of statistical test functions, associating the name of a variable in the data, and a test to use specifically for that variable.

That test name must be expressed as a single-term formula (e.g. \code{~t.test}). You don't have to specify tests for all the variables: a default test for all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}.

If data is a grouped dataframe (using \code{group_by}), subtables are created and statistic tests are performed over each sub-group.
}

\section{Output}{

The output is a desctable object, which is a list of named dataframes that can be further manipulated. Methods for printing, using in \pkg{pander} and \pkg{DT} are present. Printing reduces the object to a dataframe.
}

\examples{
iris \%>\%
desctable

# Does the same as stats_auto here
iris \%>\%
desctable(stats = list("N" = length,
"\%/Mean" = is.factor ~ percent | (is.normal ~ mean),
"sd" = is.normal ~ sd,
"Med" = is.normal ~ NA | median,
"IQR" = is.normal ~ NA | IQR))

# With labels
mtcars \%>\% desctable(labels = c(hp = "Horse Power",
cyl = "Cylinders",
mpg = "Miles per gallon"))

# With grouping on a factor
iris \%>\%
group_by(Species) \%>\%
desctable(stats = stats_default)

# With nested grouping, on arbitrary variables
mtcars \%>\%
group_by(vs, cyl) \%>\%
desctable

# With grouping on a condition, and choice of tests
iris \%>\%
group_by(Petal.Length > 5) \%>\%
desctable(tests = list(.auto = tests_auto, Species = ~chisq.test))
}
\seealso{
\code{\link{stats_auto}}

\code{\link{tests_auto}}

\code{\link{print.desctable}}

\code{\link{pander.desctable}}

\code{\link{datatable.desctable}}
}

+ 212
- 0
man/fisher.test.Rd View File

@@ -0,0 +1,212 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convenience_functions.R
\name{fisher.test}
\alias{fisher.test}
\alias{fisher.test.default}
\alias{fisher.test.formula}
\title{Fisher's Exact Test for Count Data}
\usage{
fisher.test(x, y, workspace, hybrid, control, or, alternative, conf.int,
conf.level, simulate.p.value, B)

\method{fisher.test}{default}(x, ...)

\method{fisher.test}{formula}(x, y = NULL, workspace = 2e+05, hybrid = F,
control = list(), or = 1, alternative = "two.sided", conf.int = T,
conf.level = 0.95, simulate.p.value = F, B = 2000)
}
\arguments{
\item{x}{either a two-dimensional contingency table in matrix form, a factor object, or a formula of the form \code{lhs ~ rhs} where \code{lhs} and \code{rhs} are factors.}

\item{y}{a factor object; ignored if \code{x} is a matrix or a formula.}

\item{workspace}{an integer specifying the size of the workspace
used in the network algorithm. In units of 4 bytes. Only used for
non-simulated p-values larger than \eqn{2 \times 2}{2 by 2} tables.
Since \R version 3.5.0, this also increases the internal stack size
which allows larger problems to be solved, however sometimes needing
hours. In such cases, \code{simulate.p.values=TRUE} may be more
reasonable.}

\item{hybrid}{a logical. Only used for larger than \eqn{2 \times 2}{2 by 2}
tables, in which cases it indicates whether the exact probabilities
(default) or a hybrid approximation thereof should be computed.}

\item{control}{a list with named components for low level algorithm
control. At present the only one used is \code{"mult"}, a positive
integer \eqn{\ge 2} with default 30 used only for larger than
\eqn{2 \times 2}{2 by 2} tables. This says how many times as much
space should be allocated to paths as to keys: see file
\file{fexact.c} in the sources of this package.}

\item{or}{the hypothesized odds ratio. Only used in the
\eqn{2 \times 2}{2 by 2} case.}

\item{alternative}{indicates the alternative hypothesis and must be
one of \code{"two.sided"}, \code{"greater"} or \code{"less"}.
You can specify just the initial letter. Only used in the
\eqn{2 \times 2}{2 by 2} case.}

\item{conf.int}{logical indicating if a confidence interval for the
odds ratio in a \eqn{2 \times 2}{2 by 2} table should be
computed (and returned).}

\item{conf.level}{confidence level for the returned confidence
interval. Only used in the \eqn{2 \times 2}{2 by 2} case and if
\code{conf.int = TRUE}.}

\item{simulate.p.value}{a logical indicating whether to compute
p-values by Monte Carlo simulation, in larger than \eqn{2 \times
2}{2 by 2} tables.}

\item{B}{an integer specifying the number of replicates used in the
Monte Carlo test.}

\item{...}{additional params to feed to original fisher.test}
}
\value{
A list with class \code{"htest"} containing the following components:

p.value: the p-value of the test.

conf.int: a confidence interval for the odds ratio. Only present in
the 2 by 2 case and if argument \code{conf.int = TRUE}.

estimate: an estimate of the odds ratio. Note that the _conditional_
Maximum Likelihood Estimate (MLE) rather than the
unconditional MLE (the sample odds ratio) is used. Only
present in the 2 by 2 case.

null.value: the odds ratio under the null, \code{or}. Only present in the 2
by 2 case.

alternative: a character string describing the alternative hypothesis.

method: the character string \code{"Fisher's Exact Test for Count Data"}.

data.name: a character string giving the names of the data.
}
\description{
Performs Fisher's exact test for testing the null of independence
of rows and columns in a contingency table with fixed marginals, or with a formula expression.
}
\details{
If \code{x} is a matrix, it is taken as a two-dimensional contingency
table, and hence its entries should be nonnegative integers.
Otherwise, both \code{x} and \code{y} must be vectors of the same length.
Incomplete cases are removed, the vectors are coerced into factor
objects, and the contingency table is computed from these.

For 2 by 2 cases, p-values are obtained directly using the
(central or non-central) hypergeometric distribution. Otherwise,
computations are based on a C version of the FORTRAN subroutine
FEXACT which implements the network developed by Mehta and Patel
(1986) and improved by Clarkson, Fan and Joe (1993). The FORTRAN
code can be obtained from \url{http://www.netlib.org/toms/643}.
Note this fails (with an error message) when the entries of the
table are too large. (It transposes the table if necessary so it
has no more rows than columns. One constraint is that the product
of the row marginals be less than 2^31 - 1.)

For 2 by 2 tables, the null of conditional independence is
equivalent to the hypothesis that the odds ratio equals one.
\code{Exact} inference can be based on observing that in general, given
all marginal totals fixed, the first element of the contingency
table has a non-central hypergeometric distribution with
non-centrality parameter given by the odds ratio (Fisher, 1935).
The alternative for a one-sided test is based on the odds ratio,
so \code{alternative = "greater"} is a test of the odds ratio being
bigger than \code{or}.

Two-sided tests are based on the probabilities of the tables, and
take as \code{more extreme} all tables with probabilities less than or
equal to that of the observed table, the p-value being the sum of
such probabilities.

For larger than 2 by 2 tables and \code{hybrid = TRUE}, asymptotic
chi-squared probabilities are only used if the ‘Cochran
conditions’ are satisfied, that is if no cell has count zero, and
more than 80% of the cells have counts at least 5: otherwise the
exact calculation is used.

Simulation is done conditional on the row and column marginals,
and works only if the marginals are strictly positive. (A C
translation of the algorithm of Patefield (1981) is used.)
}
\examples{
\dontrun{
## Agresti (1990, p. 61f; 2002, p. 91) Fisher's Tea Drinker
## A British woman claimed to be able to distinguish whether milk or
## tea was added to the cup first. To test, she was given 8 cups of
## tea, in four of which milk was added first. The null hypothesis
## is that there is no association between the true order of pouring
## and the woman's guess, the alternative that there is a positive
## association (that the odds ratio is greater than 1).
TeaTasting <-
matrix(c(3, 1, 1, 3),
nrow = 2,
dimnames = list(Guess = c("Milk", "Tea"),
Truth = c("Milk", "Tea")))
fisher.test(TeaTasting, alternative = "greater")
## => p = 0.2429, association could not be established

## Fisher (1962, 1970), Criminal convictions of like-sex twins
Convictions <-
matrix(c(2, 10, 15, 3),
nrow = 2,
dimnames =
list(c("Dizygotic", "Monozygotic"),
c("Convicted", "Not convicted")))
Convictions
fisher.test(Convictions, alternative = "less")
fisher.test(Convictions, conf.int = FALSE)
fisher.test(Convictions, conf.level = 0.95)$conf.int
fisher.test(Convictions, conf.level = 0.99)$conf.int

## A r x c table Agresti (2002, p. 57) Job Satisfaction
Job <- matrix(c(1,2,1,0, 3,3,6,1, 10,10,14,9, 6,7,12,11), 4, 4,
dimnames = list(income = c("< 15k", "15-25k", "25-40k", "> 40k"),
satisfaction = c("VeryD", "LittleD", "ModerateS", "VeryS")))
fisher.test(Job)
fisher.test(Job, simulate.p.value = TRUE, B = 1e5)

###
}
}
\references{
Agresti, A. (1990) _Categorical data analysis_. New York: Wiley.
Pages 59-66.

Agresti, A. (2002) _Categorical data analysis_. Second edition.
New York: Wiley. Pages 91-101.

Fisher, R. A. (1935) The logic of inductive inference. _Journal
of the Royal Statistical Society Series A_ *98*, 39-54.

Fisher, R. A. (1962) Confidence limits for a cross-product ratio.
_Australian Journal of Statistics_ *4*, 41.

Fisher, R. A. (1970) _Statistical Methods for Research Workers._
Oliver & Boyd.

Mehta, C. R. and Patel, N. R. (1986) Algorithm 643. FEXACT: A
Fortran subroutine for Fisher's exact test on unordered r*c
contingency tables. _ACM Transactions on Mathematical Software_,
*12*, 154-161.

Clarkson, D. B., Fan, Y. and Joe, H. (1993) A Remark on Algorithm
643: FEXACT: An Algorithm for Performing Fisher's Exact Test in r
x c Contingency Tables. _ACM Transactions on Mathematical
Software_, *19*, 484-488.

Patefield, W. M. (1981) Algorithm AS159. An efficient method of
generating r x c tables with given row and column totals.
_Applied Statistics_ *30*, 91-97.
}
\seealso{
\code{\link{chisq.test}}

\code{fisher.exact} in package \pkg{kexact2x2} for alternative
interpretations of two-sided tests and confidence intervals for 2
by 2 tables.
}

+ 17
- 0
man/flatten_desctable.Rd View File

@@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{flatten_desctable}
\alias{flatten_desctable}
\title{Flatten a desctable to a dataframe recursively}
\usage{
flatten_desctable(desctable)
}
\arguments{
\item{desctable}{A desctable object}
}
\value{
A flat dataframe
}
\description{
Flatten a desctable to a dataframe recursively
}

+ 12
- 0
man/group_by.Rd View File

@@ -0,0 +1,12 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/imports.R
\name{group_by}
\alias{group_by}
\title{Group a tbl by one or more variables.}
\usage{
group_by(.data, ..., add = FALSE)
}
\description{
Group a tbl by one or more variables.
}
\keyword{internal}

+ 17
- 0
man/head_dataframe.Rd View File

@@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{head_dataframe}
\alias{head_dataframe}
\title{Build the header for dataframe}
\usage{
head_dataframe(head)
}
\arguments{
\item{head}{A headerList object}
}
\value{
A names vector
}
\description{
Build the header for dataframe
}

+ 17
- 0
man/head_datatable.Rd View File

@@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{head_datatable}
\alias{head_datatable}
\title{Build the header for datatable}
\usage{
head_datatable(head)
}
\arguments{
\item{head}{A headerList object}
}
\value{
An htmltools$tags object containing the header
}
\description{
Build the header for datatable
}

+ 17
- 0
man/head_pander.Rd View File

@@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{head_pander}
\alias{head_pander}
\title{Build the header for pander}
\usage{
head_pander(head)
}
\arguments{
\item{head}{A headerList object}
}
\value{
A names vector
}
\description{
Build the header for pander
}

+ 20
- 0
man/header.Rd View File

@@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{header}
\alias{header}
\title{Build header}
\usage{
header(desctable, output = c("pander", "datatable", "dataframe"))
}
\arguments{
\item{desctable}{A desctable object}

\item{output}{An output format for the header}
}
\value{
A header object in the output format
}
\description{
Take a desctable object and create a suitable header for the mentionned output.
Output can be one of "pander", "datatable", or "dataframe".
}

+ 17
- 0
man/headerList.Rd View File

@@ -0,0 +1,17 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{headerList}
\alias{headerList}
\title{Build a header list object}
\usage{
headerList(desctable)
}
\arguments{
\item{desctable}{A desctable}
}
\value{
A nested list of headers with colspans
}
\description{
Build a header list object
}

+ 21
- 0
man/insert.Rd View File

@@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{insert}
\alias{insert}
\title{Insert a vector y inside another vector x at position}
\usage{
insert(x, y, position)
}
\arguments{
\item{x}{A vector}

\item{y}{A vector or list of vectors}

\item{position}{The position / vector of positions to insert vector(s) y in vector x}
}
\value{
The combined vector
}
\description{
Insert a vector y inside another vector x at position
}

+ 18
- 0
man/is.normal.Rd View File

@@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convenience_functions.R
\name{is.normal}
\alias{is.normal}
\title{Test if distribution is normal}
\usage{
is.normal(x)
}
\arguments{
\item{x}{A numerical vector}
}
\value{
A boolean
}
\description{
Test if distribution is normal.
The condition for normality is length > 30 and non-significant Shapiro-Wilks test with p > .1
}

+ 14
- 0
man/no.test.Rd View File

@@ -0,0 +1,14 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convenience_functions.R
\name{no.test}
\alias{no.test}
\title{No test}
\usage{
no.test(formula)
}
\arguments{
\item{formula}{A formula}
}
\description{
An empty test
}

+ 35
- 0
man/pander.desctable.Rd View File

@@ -0,0 +1,35 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/output.R
\name{pander.desctable}
\alias{pander.desctable}
\title{Pander method for desctable}
\usage{
pander.desctable(x = NULL, digits = 2, justify = "left", missing = "",
keep.line.breaks = T, split.tables = Inf, emphasize.rownames = F, ...)
}
\arguments{
\item{x}{A desctable}

\item{digits}{passed to \code{format}. Can be a vector specifying values for each column (has to be the same length as number of columns).}

\item{justify}{defines alignment in cells passed to \code{format}. Can be \code{left}, \code{right} or \code{centre}, which latter can be also spelled as \code{center}. Defaults to \code{centre}. Can be abbreviated to a string consisting of the letters \code{l}, \code{c} and \code{r} (e.g. 'lcr' instead of c('left', 'centre', 'right').}

\item{missing}{string to replace missing values}

\item{keep.line.breaks}{(default: \code{FALSE}) if to keep or remove line breaks from cells in a table}

\item{split.tables}{where to split wide tables to separate tables. The default value (\code{80}) suggests the conventional number of characters used in a line, feel free to change (e.g. to \code{Inf} to disable this feature) if you are not using a VT100 terminal any more :)}

\item{emphasize.rownames}{boolean (default: \code{TRUE}) if row names should be highlighted}

\item{...}{unsupported extra arguments directly placed into \code{/dev/null}}
}
\description{
Pander method to output a desctable
}
\details{
Uses \code{pandoc.table}, with some default parameters (\code{digits = 2}, \code{justify = "left"}, \code{missing = ""}, \code{keep.line.breaks = T}, \code{split.tables = Inf}, and \code{emphasize.rownames = F}), that you can override if needed.
}
\seealso{
\code{\link{pandoc.table}}
}

+ 28
- 0
man/parse_formula.Rd View File

@@ -0,0 +1,28 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/utils.R
\name{parse_formula}
\alias{parse_formula}
\title{Parse a formula}
\usage{
parse_formula(x, f)
}
\arguments{
\item{x}{The variable to test it on}

\item{f}{A formula to parse}
}
\value{
A function to use as a stat/test
}
\description{
Parse a formula defining the conditions to pick a stat/test
}
\details{
Parse a formula defining the conditions to pick a stat/test
and return the function to use.
The formula is to be given in the form of
conditional ~ T | F
and conditions can be nested such as
conditional1 ~ (conditional2 ~ T | F) | F
The FALSE option can be omitted, and the TRUE can be replaced with NA
}

+ 18
- 0
man/percent.Rd View File

@@ -0,0 +1,18 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/convenience_functions.R
\name{percent}
\alias{percent}
\title{Return the percentages for the levels of a factor}
\usage{
percent(x)
}
\arguments{
\item{x}{A factor}
}
\value{
A nlevels(x) + 1 length vector of percentages
}
\description{
Return a compatible vector of length nlevels(x) + 1
to print the percentages of each level of a factor
}

+ 12
- 0
man/pipe.Rd View File

@@ -0,0 +1,12 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/imports.R
\name{\%>\%}
\alias{\%>\%}
\title{Pipe operator}
\usage{
lhs \%>\% rhs
}
\description{
Pipe operator
}
\keyword{internal}

+ 19
- 0
man/print.desctable.Rd View File

@@ -0,0 +1,19 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/output.R
\name{print.desctable}
\alias{print.desctable}
\title{Print method for desctable}
\usage{
\method{print}{desctable}(x, ...)
}
\arguments{
\item{x}{A desctable}

\item{...}{Additional print parameters}
}
\value{
A flat dataframe
}
\description{
Print method for desctable
}

+ 19
- 0
man/statColumn.Rd View File

@@ -0,0 +1,19 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/build.R
\name{statColumn}
\alias{statColumn}
\title{Generate one statistic for all variables}
\usage{
statColumn(stat, data)
}
\arguments{
\item{stat}{The statistic to use}

\item{data}{The dataframe to apply the statistic to}
}
\value{
A vector for one statistic column
}
\description{
Generate one statistic for all variables
}

+ 19
- 0
man/statTable.Rd View File

@@ -0,0 +1,19 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/build.R
\name{statTable}
\alias{statTable}
\title{Generate the table of all statistics for all variables}
\usage{
statTable(data, stats)
}
\arguments{
\item{data}{The dataframe to apply the statistic to}

\item{stats}{A list of named statistics to use}
}
\value{
A dataframe of all statistics for all variables
}
\description{
Generate the table of all statistics for all variables
}

+ 34
- 0
man/statify.Rd View File

@@ -0,0 +1,34 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stats.R
\name{statify}
\alias{statify}
\alias{statify.default}
\alias{statify.formula}
\title{Transform any function into a valid stat function for the table}
\usage{
statify(x, f)

\method{statify}{default}(x, f)

\method{statify}{formula}(x, f)
}
\arguments{
\item{x}{A vector}

\item{f}{The function to try to apply, or a formula combining two functions}
}
\value{
The results for the function applied on the vector, compatible with the format of the result table
}
\description{
Transform a function into a valid stat function for the table
}
\details{
NA values are removed from the data

Applying the function on a numerical vector should return one value

Applying the function on a factor should return nlevels + 1 value, or one value per factor level

See \code{parse_formula} for the usage for formulaes.
}

+ 37
- 0
man/stats_default.Rd View File

@@ -0,0 +1,37 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stats.R
\name{stats_default}
\alias{stats_default}
\alias{stats_normal}
\alias{stats_nonnormal}
\alias{stats_auto}
\title{Functions to create a list of statistics to use in desctable}
\usage{
stats_default(data)

stats_normal(data)

stats_nonnormal(data)

stats_auto(data)
}
\arguments{
\item{data}{The dataframe to apply the statistic to}
}
\value{
A list of statistics to use, potentially assessed from the dataframe
}
\description{
These functions take a dataframe as argument and return a list of statistcs in the form accepted by desctable.
}
\details{
Already defined are
\enumerate{
\item stats_default with length, \%, mean, sd, med and IQR
\item stats_normal with length, \%, mean and sd
\item stats_nonnormal with length, %, median and IQR
\item stats_auto, which picks stats depending of the data
}

You can define your own automatic functions, as long as they take a dataframe as argument and return a list of functions or formulas defining conditions to use a stat function.
}

+ 20
- 0
man/subNames.Rd View File

@@ -0,0 +1,20 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/build.R
\name{subNames}
\alias{subNames}
\title{Create the subtables names}
\usage{
subNames(grp, df)
}
\arguments{
\item{grp}{Grouping factor}

\item{df}{Dataframe containing the grouping factor}
}
\value{
A character vector with the names for the subtables
}
\description{
Create the subtables names, as
factor: level (n=sub-group length)
}

+ 23
- 0
man/subTable.Rd View File

@@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/build.R
\name{subTable}
\alias{subTable}
\title{Create a subtable in a grouped desctable}
\usage{
subTable(df, stats, tests, grps)
}
\arguments{
\item{df}{Dataframe to use}

\item{stats}{Stats list/function to use}

\item{tests}{Tests list/function to use}

\item{grps}{List of symbols for grouping factors}
}
\value{
A nested list of statTables and testColumns
}
\description{
Create a subtable in a grouped desctable
}

+ 21
- 0
man/testColumn.Rd View File

@@ -0,0 +1,21 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/build.R
\name{testColumn}
\alias{testColumn}
\title{Create the pvalues column}
\usage{
testColumn(df, tests, grp)
}
\arguments{
\item{df}{Dataframe to use for the tests}

\item{tests}{Test function or list of functions}

\item{grp}{Grouping factor}
}
\value{
A numeric vector of pvalues
}
\description{
Create the pvalues column
}

+ 23
- 0
man/testify.Rd View File

@@ -0,0 +1,23 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tests.R
\name{testify}
\alias{testify}
\title{Transform any test function into a valid test function for the table}
\usage{
testify(x, f, group)
}
\arguments{
\item{x}{A vector}

\item{f}{The function to try to apply, or a formula combining two functions}

\item{group}{Grouping factor}
}
\value{
The results for the function applied on the vector, compatible with the format of the result table
}
\description{
Transform a function into a valid test function for the table
Applying the function on a numerical vector should return one value
Applying the function on a factor should return nlevels + 1 value, or one value per factor level
}

+ 22
- 0
man/tests_auto.Rd View File

@@ -0,0 +1,22 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tests.R
\name{tests_auto}
\alias{tests_auto}
\title{Functions to choose a statistical test}
\usage{
tests_auto(var, grp)
}
\arguments{
\item{var}{The variable to test}

\item{grp}{The variable for the groups}
}
\value{
A statistical test function
}
\description{
These functions take a variable and a grouping variable as arguments, and return a statistcal test to use, expressed as a single-term formula.
}
\details{
Currently, only \code{tests_auto} is defined, and picks between t test, wilcoxon, anova, kruskal-wallis and fisher depending on the number of groups, the type of the variable, the normality and homoskedasticity of the distributions.
}

+ 25
- 0
man/varColumn.Rd View File

@@ -0,0 +1,25 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/build.R
\name{varColumn}
\alias{varColumn}
\title{Generate the variable column to display as row names}
\usage{
varColumn(data, labels = NULL)
}
\arguments{
\item{data}{The dataframe to get the names from}

\item{labels}{The optional named character vector containing the keypairs var = "Label"}
}
\value{
A dataframe with one variable named "Variables", a character vector of variable names/labels and levels
}
\description{
Generates the variable column.
Replaces the variable names by their label if given in the named character vector labels, and inserts levels for factors.
}
\details{
labels is an option named character vector used to make the table prettier.
If given, the variable names for which there is a label will be replaced by their corresponding label.
Not all variables need to have a label, and labels for non-existing variables are ignored.
}

Loading…
Cancel
Save