You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

607 lines
20KB

  1. #' @importFrom pander pander
  2. pander::pander
  3. #' Generate a statistics table
  4. #'
  5. #' Generate a statistics table with the chosen statistical functions, and tests if given a \code{"grouped"} dataframe.
  6. #'
  7. #' @section Labels:
  8. #' labels is an option named character vector used to make the table prettier.
  9. #'
  10. #' If given, the variable names for which there is a label will be replaced by their corresponding label.
  11. #'
  12. #' Not all variables need to have a label, and labels for non-existing variables are ignored.
  13. #'
  14. #' labels must be given in the form c(unquoted_variable_name = "label")
  15. #'
  16. #' @section Stats:
  17. #' The stats can be a function which takes a dataframe and returns a list of statistical functions to use.
  18. #'
  19. #' stats can also be a named list of statistical functions, or purrr::map like formulas.
  20. #'
  21. #' The names will be used as column names in the resulting table. If an element of the list is a function, it will be used as-is for the stats.
  22. #'
  23. #' @section Tests:
  24. #' The tests can be a function which takes a variable and a grouping variable, and returns an appropriate statistical test to use in that case.
  25. #'
  26. #' tests can also be a named list of statistical test functions, associating the name of a variable in the data and a test to use specifically for that variable.
  27. #'
  28. #' That test name must be expressed as a single-term formula (e.g. \code{~t.test}), or a purrr::map like formula
  29. #' (e.g. \code{~t.test(., var.equal = T)}). You don't have to specify tests for all the variables: a default test for
  30. #' all other variables can be defined with the name \code{.default}, and an automatic test can be defined with the name \code{.auto}.
  31. #'
  32. #' If data is a grouped dataframe (using \code{group_by}), subtables are created and statistic tests are performed over each sub-group.
  33. #'
  34. #' @section Output:
  35. #' The output is a desctable object, which is a list of named dataframes that can be further manipulated. Methods for printing, using in \pkg{pander} and \pkg{DT} are present. Printing reduces the object to a dataframe.
  36. #'
  37. #' @param data The dataframe to analyze
  38. #' @param stats A list of named statistics to apply to each element of the dataframe, or a function returning a list of named statistics
  39. #' @param tests A list of statistical tests to use when calling desctable with a grouped_df
  40. #' @param labels A named character vector of labels to use instead of variable names
  41. #' @return A desctable object, which prints to a table of statistics for all variables
  42. #' @seealso \code{\link{stats_auto}}
  43. #' @seealso \code{\link{tests_auto}}
  44. #' @seealso \code{\link{print.desctable}}
  45. #' @seealso \code{\link{pander.desctable}}
  46. #' @seealso \code{\link{datatable.desctable}}
  47. #' @export
  48. #' @keywords deprecated
  49. #' @examples
  50. #' iris %>%
  51. #' desctable()
  52. #'
  53. #' # Does the same as stats_auto here
  54. #' iris %>%
  55. #' desctable(stats = list("N" = length,
  56. #' "Mean" = ~ if (is.normal(.)) mean(.),
  57. #' "sd" = ~ if (is.normal(.)) sd(.),
  58. #' "Med" = stats::median,
  59. #' "IQR" = ~ if(!is.factor(.)) IQR(.)))
  60. #'
  61. #' # With labels
  62. #' mtcars %>% desctable(labels = c(hp = "Horse Power",
  63. #' cyl = "Cylinders",
  64. #' mpg = "Miles per gallon"))
  65. #'
  66. #' # With grouping on a factor
  67. #' iris %>%
  68. #' group_by(Species) %>%
  69. #' desctable(stats = stats_default)
  70. #'
  71. #' # With nested grouping, on arbitrary variables
  72. #' mtcars %>%
  73. #' group_by(vs, cyl) %>%
  74. #' desctable()
  75. #'
  76. #' # With grouping on a condition, and choice of tests
  77. #' iris %>%
  78. #' group_by(Petal.Length > 5) %>%
  79. #' desctable(tests = list(.auto = tests_auto, Species = ~chisq.test))
  80. desctable <- function(data, stats, tests, labels) {
  81. warning("desctable is deprecated and will be removed in 1.0.0.
  82. Please use the `desc_*` family of functions (`desc_table`, `desc_tests`, `desc_output`)")
  83. UseMethod("desctable", data)
  84. }
  85. #' @rdname desctable
  86. #' @export
  87. desctable.default <- function(data, stats = stats_auto, tests, labels = NULL) {
  88. # Assemble the Variables and the statTable in a single desctable object
  89. list(Variables = varColumn(data, labels),
  90. stats = statTable(data, stats)) %>%
  91. set_desctable_class()
  92. }
  93. #' @rdname desctable
  94. #' @export
  95. desctable.grouped_df <- function(data, stats = stats_auto, tests = tests_auto, labels = NULL) {
  96. # Get groups then ungroup dataframe
  97. grps <- dplyr::groups(data)
  98. data <- dplyr::ungroup(data)
  99. # Assemble the Variables (excluding the grouping ones) and the subTables recursively in a single desctable object
  100. c(Variables = list(varColumn(data[!names(data) %in% (grps %>% lapply(as.character) %>% unlist())], labels)),
  101. subTable(data, stats, tests, grps)) %>%
  102. set_desctable_class()
  103. }
  104. #' Create the subtables names
  105. #'
  106. #' Create the subtables names, as
  107. #' factor: level (n=sub-group length)
  108. #'
  109. #' @param grp Grouping factor
  110. #' @param df Dataframe containing the grouping factor
  111. #' @return A character vector with the names for the subtables
  112. #' @keywords deprecated internal
  113. subNames <- function(grp, df) {
  114. paste0(as.character(grp),
  115. ": ",
  116. eval(grp, df) %>% factor() %>% levels(),
  117. " (n=",
  118. summary(eval(grp, df) %>% factor() %>% stats::na.omit(), maxsum = Inf),
  119. ")")
  120. }
  121. #' Create a subtable in a grouped desctable
  122. #'
  123. #' @param df Dataframe to use
  124. #' @param stats Stats list/function to use
  125. #' @param tests Tests list/function to use
  126. #' @param grps List of symbols for grouping factors
  127. #' @return A nested list of statTables and testColumns
  128. #' @keywords deprecated internal
  129. subTable <- function(df, stats, tests, grps) {
  130. # Final group, compute tests
  131. if (length(grps) == 1) {
  132. group <- factor(eval(grps[[1]], df))
  133. # Create the subtable stats
  134. df[!names(df) %in% as.character(grps[[1]])] %>%
  135. by(group, statTable, stats) %>%
  136. # Name the subtables with info about group and group size
  137. stats::setNames(subNames(grps[[1]], df)) -> stats
  138. # Create the subtable tests
  139. pvalues <- testColumn(df, tests, grps[[1]])
  140. c(stats, tests = list(pvalues))
  141. } else {
  142. group <- eval(grps[[1]], df)
  143. # Go through the next grouping levels and build the subtables
  144. df[!names(df) %in% as.character(grps[[1]])] %>%
  145. by(group, subTable, stats, tests, grps[-1]) %>%
  146. # Name the subtables with info about group and group size
  147. stats::setNames(subNames(grps[[1]], df))
  148. }
  149. }
  150. #' Print method for desctable
  151. #'
  152. #' @param x A desctable
  153. #' @param ... Additional print parameters
  154. #' @return A flat dataframe
  155. #' @export
  156. #' @keywords deprecated
  157. print.desctable <- function(x, ...) {
  158. print(as.data.frame(x))
  159. }
  160. #' As.data.frame method for desctable
  161. #'
  162. #' @param x A desctable
  163. #' @param ... Additional as.data.frame parameters
  164. #' @return A flat dataframe
  165. #' @export
  166. #' @keywords deprecated
  167. as.data.frame.desctable <- function(x, ...) {
  168. # Discard "markdown" formatting of variable names
  169. x$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*", "\\1", x$Variables$Variables)
  170. x$Variables$Variables <- gsub("\\*(.*?)\\*", "\\1", x$Variables$Variables)
  171. # Create a dataframe header
  172. header <- header(x, "dataframe")
  173. # Make a standard dataframe
  174. x %>%
  175. flatten_desctable() %>%
  176. data.frame(check.names = F, ...) %>%
  177. stats::setNames(header)
  178. }
  179. #' Pander method for desctable
  180. #'
  181. #' Pander method to output a desctable
  182. #'
  183. #' Uses \code{pandoc.table}, with some default parameters (\code{digits = 2}, \code{justify = "left"}, \code{missing = ""}, \code{keep.line.breaks = T}, \code{split.tables = Inf}, and \code{emphasize.rownames = F}), that you can override if needed.
  184. #'
  185. #' @param x A desctable
  186. #' @inheritParams pander::pandoc.table
  187. #' @seealso \code{\link{pandoc.table}}
  188. #' @export
  189. #' @keywords deprecated
  190. pander.desctable <- function(x = NULL,
  191. digits = 2,
  192. justify = "left",
  193. missing = "",
  194. keep.line.breaks = T,
  195. split.tables = Inf,
  196. emphasize.rownames = F,
  197. ...) {
  198. if (is.null(digits)) digits <- pander::panderOptions("digits")
  199. # Discard "markdown" and insert 4 NbSp before factor levels
  200. x$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", "&nbsp;&nbsp;&nbsp;&nbsp;\\2", x$Variables$Variables)
  201. # Create a pander header
  202. header <- header(x, "pander")
  203. # Make a dataframe and push it to pandoc
  204. x %>%
  205. flatten_desctable %>%
  206. data.frame(check.names = F, stringsAsFactors = F) %>%
  207. stats::setNames(header) %>%
  208. pander::pandoc.table(justify = justify,
  209. digits = digits,
  210. missing = missing,
  211. keep.line.breaks = keep.line.breaks,
  212. split.tables = split.tables,
  213. emphasize.rownames = emphasize.rownames,
  214. ...)
  215. }
  216. #' Create an HTML table widget using the DataTables library
  217. #'
  218. #' This function creates an HTML widget to display rectangular data (a matrix or data frame) using the JavaScript library DataTables, with a method for \code{desctable} objects.
  219. #'
  220. #' @note
  221. #' You are recommended to escape the table content for security reasons (e.g. XSS attacks) when using this function in Shiny or any other dynamic web applications.
  222. #' @references
  223. #' See \url{https://rstudio.github.io/DT/} for the full documentation.
  224. #' @examples
  225. #' library(DT)
  226. #'
  227. #' # see the package vignette for examples and the link to website
  228. #' vignette('DT', package = 'DT')
  229. #'
  230. #' # some boring edge cases for testing purposes
  231. #' m = matrix(nrow = 0, ncol = 5, dimnames = list(NULL, letters[1:5]))
  232. #' datatable(m) # zero rows
  233. #' datatable(as.data.frame(m))
  234. #'
  235. #' m = matrix(1, dimnames = list(NULL, 'a'))
  236. #' datatable(m) # one row and one column
  237. #' datatable(as.data.frame(m))
  238. #'
  239. #' m = data.frame(a = 1, b = 2, c = 3)
  240. #' datatable(m)
  241. #' datatable(as.matrix(m))
  242. #'
  243. #' # dates
  244. #' datatable(data.frame(
  245. #' date = seq(as.Date("2015-01-01"), by = "day", length.out = 5), x = 1:5
  246. #' ))
  247. #' datatable(data.frame(x = Sys.Date()))
  248. #' datatable(data.frame(x = Sys.time()))
  249. #'
  250. #' ###
  251. #' @inheritParams DT::datatable
  252. #' @export
  253. #' @keywords deprecated
  254. datatable <- function(data, ...) {
  255. UseMethod("datatable", data)
  256. }
  257. #' @rdname datatable
  258. #' @export
  259. datatable.default <- function(data,
  260. options = list(),
  261. class = "display",
  262. callback = DT::JS("return table;"),
  263. caption = NULL,
  264. filter = c("none", "bottom", "top"),
  265. escape = TRUE,
  266. style = "default",
  267. width = NULL,
  268. height = NULL,
  269. elementId = NULL,
  270. fillContainer = getOption("DT.fillContainer", NULL),
  271. autoHideNavigation = getOption("DT.autoHideNavigation", NULL),
  272. selection = c("multiple", "single", "none"),
  273. extensions = list(),
  274. plugins = NULL, ...) {
  275. DT::datatable(data, options = options, class = class, callback = callback, caption = caption, filter = filter, escape = escape, style = style, width = width, height = height, elementId = elementId, fillContainer = fillContainer, autoHideNavigation = autoHideNavigation, selection = selection, extensions = extensions, plugins = plugins, ...)
  276. }
  277. #' @rdname datatable
  278. #' @inheritParams base::prettyNum
  279. #' @export
  280. datatable.desctable <- function(data,
  281. options = list(paging = F,
  282. info = F,
  283. search = list(),
  284. dom = "Brtip",
  285. fixedColumns = T,
  286. fixedHeader = T,
  287. buttons = c("copy", "excel")),
  288. class = "display",
  289. callback = DT::JS("return table;"),
  290. caption = NULL,
  291. filter = c("none", "bottom", "top"),
  292. escape = FALSE,
  293. style = "default",
  294. width = NULL,
  295. height = NULL,
  296. elementId = NULL,
  297. fillContainer = getOption("DT.fillContainer", NULL),
  298. autoHideNavigation = getOption("DT.autoHideNavigation", NULL),
  299. selection = c("multiple", "single", "none"),
  300. extensions = c("FixedHeader", "FixedColumns", "Buttons"),
  301. plugins = NULL,
  302. rownames = F,
  303. digits = 2, ...) {
  304. # Discard "markdown" and insert 4 NbSp before factor levels
  305. data$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*: \\*(.*?)\\*", "&nbsp;&nbsp;&nbsp;&nbsp;\\2", data$Variables$Variables)
  306. data$Variables$Variables <- gsub("\\*\\*(.*?)\\*\\*", "<b>\\1</b>", data$Variables$Variables)
  307. # Create a datatable header
  308. header <- header(data, "datatable")
  309. # Flatten desctable
  310. flat <- flatten_desctable(data)
  311. # Replace NAs and apply digits arg
  312. if (!is.null(digits))
  313. {
  314. flat %>%
  315. lapply(prettyNum, digits = digits) %>%
  316. lapply(gsub, pattern = "^NA$", replacement = "") -> flat
  317. }
  318. # Make a dataframe and push it to datatable, with its custom header
  319. flat %>%
  320. data.frame(check.names = F, stringsAsFactors = F) %>%
  321. DT::datatable(container = header,
  322. options = options,
  323. extensions = extensions,
  324. escape = escape,
  325. class = class,
  326. callback = callback,
  327. caption = caption,
  328. filter = filter,
  329. style = style,
  330. width = width,
  331. height = height,
  332. elementId = elementId,
  333. fillContainer = fillContainer,
  334. autoHideNavigation = autoHideNavigation,
  335. selection = selection,
  336. plugins = plugins,
  337. rownames = rownames, ...)
  338. }
  339. #' Set the "desctable" class to the passed object
  340. #'
  341. #' @param x Object to set the "desctable" class to
  342. #' @return The object with the class "desctable"
  343. #' @keywords deprecated internal
  344. set_desctable_class <- function(x) {
  345. class(x) <- "desctable"
  346. x
  347. }
  348. #' Parse a formula
  349. #'
  350. #' Parse a formula defining the conditions to pick a stat/test
  351. #'
  352. #' Parse a formula defining the conditions to pick a stat/test
  353. #' and return the function to use.
  354. #' The formula is to be given in the form of
  355. #' conditional ~ T | F
  356. #' and conditions can be nested such as
  357. #' conditional1 ~ (conditional2 ~ T | F) | F
  358. #' The FALSE option can be omitted, and the TRUE can be replaced with NA
  359. #'
  360. #' @param x The variable to test it on
  361. #' @param f A formula to parse
  362. #' @return A function to use as a stat/test
  363. #' @keywords deprecated internal
  364. parse_formula <- function(x, f) {
  365. parse_f <- function(x) {
  366. if (length(x) == 1) as.character(x)
  367. else {
  368. if (as.character(x[[1]]) == "~") {
  369. paste0("if (", parse_f(x[[2]]), "(x)) ",
  370. "{",
  371. parse_f(x[[3]]),
  372. "}")
  373. } else if (as.character(x[[1]]) == "|") {
  374. paste0(parse_f(x[[2]]),
  375. "} else ",
  376. "{",
  377. parse_f(x[[3]]))
  378. } else if (as.character(x[[1]]) == "(") {
  379. parse_f(x[[2]])
  380. }
  381. }
  382. }
  383. eval(parse(text = parse_f(f)))
  384. }
  385. #' Build the header for pander
  386. #'
  387. #' @param head A headerList object
  388. #' @return A names vector
  389. #' @keywords deprecated internal
  390. head_pander <- function(head) {
  391. if (is.integer(head[[1]])) {
  392. head %>%
  393. names %>%
  394. lapply(function(x){c(x, rep("", head[[x]] - 1))}) %>%
  395. unlist()
  396. } else {
  397. paste(head %>%
  398. names() %>%
  399. lapply(function(x){c(x, rep("", attr(head[[x]], "colspan") - 1))}) %>%
  400. unlist(),
  401. head %>%
  402. lapply(head_pander) %>%
  403. unlist(),
  404. sep = "<br/>")
  405. }
  406. }
  407. #' Build the header for datatable
  408. #'
  409. #' @param head A headerList object
  410. #' @return An htmltools$tags object containing the header
  411. #' @keywords deprecated internal
  412. head_datatable <- function(head) {
  413. TRs <- list()
  414. while (is.list(head[[1]])) {
  415. TR <- mapply(function(x, y) htmltools::tags$th(x, colspan = y), names(head), lapply(head, attr, "colspan"), SIMPLIFY = F)
  416. TRs <- c(TRs, list(TR))
  417. head <- purrr::flatten(head)
  418. }
  419. c(TRs, list(mapply(function(x, y) htmltools::tags$th(x, colspan = y), names(head), head, SIMPLIFY = F)))
  420. }
  421. #' Build the header for dataframe
  422. #'
  423. #' @param head A headerList object
  424. #' @return A names vector
  425. #' @keywords deprecated internal
  426. head_dataframe <- function(head) {
  427. if (is.integer(head[[1]])) {
  428. head %>%
  429. names() %>%
  430. lapply(function(x){rep(x, head[[x]])}) %>%
  431. unlist()
  432. } else {
  433. paste(head %>%
  434. names() %>%
  435. lapply(function(x){rep(x, attr(head[[x]], "colspan"))}) %>%
  436. unlist(),
  437. head %>%
  438. lapply(head_pander) %>%
  439. unlist(),
  440. sep = " / ")
  441. }
  442. }
  443. #' Build header
  444. #'
  445. #' Take a desctable object and create a suitable header for the mentionned output.
  446. #' Output can be one of "pander", "datatable", or "dataframe".
  447. #'
  448. #' @param desctable A desctable object
  449. #' @param output An output format for the header
  450. #' @return A header object in the output format
  451. #' @keywords deprecated internal
  452. header <- function(desctable, output = c("pander", "datatable", "dataframe")) {
  453. desctable[-1] %>%
  454. flatten_desctable() %>%
  455. data.frame(check.names = F) %>%
  456. names() -> nm
  457. desctable <- desctable[-1]
  458. if (length(desctable) == 1) {
  459. if (output == "datatable") {
  460. c("\u00A0", nm) %>%
  461. lapply(htmltools::tags$th) %>%
  462. htmltools::tags$tr() %>%
  463. htmltools::tags$thead() %>%
  464. htmltools::tags$table(class = "display")
  465. } else c("\u00A0", nm)
  466. } else {
  467. head <- headerList(desctable)
  468. if (output == "pander") {
  469. c("\u00A0", head_pander(head) %>%
  470. paste(nm, sep = "<br/>"))
  471. } else if (output == "datatable") {
  472. head <- c(head_datatable(head), list(nm %>% lapply(htmltools::tags$th)))
  473. head[[1]] <- c(list(htmltools::tags$th(rowspan = length(head))), head[[1]])
  474. head %>%
  475. lapply(htmltools::tags$tr) %>%
  476. htmltools::tags$thead() %>%
  477. htmltools::tags$table(class = "display")
  478. } else if (output == "dataframe") {
  479. c("\u00A0", head_dataframe(head) %>% paste(nm, sep = " / "))
  480. }
  481. }
  482. }
  483. #' build a header list object
  484. #'
  485. #' @param desctable a desctable
  486. #' @return a nested list of headers with colspans
  487. #' @keywords deprecated internal
  488. headerList <- function(desctable) {
  489. if (is.data.frame(desctable)) length(desctable)
  490. else {
  491. rec <- lapply(desctable, headerList)
  492. if (is.integer(rec[[1]])) attr(rec, "colspan") <- rec %>% unlist() %>% sum()
  493. else attr(rec, "colspan") <- rec %>% lapply(attr, "colspan") %>% unlist() %>% sum()
  494. rec
  495. }
  496. }
  497. #' Flatten a desctable to a dataframe recursively
  498. #'
  499. #' @param desctable A desctable object
  500. #' @return A flat dataframe
  501. #' @keywords deprecated internal
  502. flatten_desctable <- function(desctable) {
  503. if (is.data.frame(desctable)) desctable
  504. else {
  505. desctable %>%
  506. lapply(flatten_desctable) %>%
  507. Reduce(f = cbind)
  508. }
  509. }
  510. #' Define a list of default statistics
  511. #'
  512. #' @param data A dataframe
  513. #' @return A list of statistical functions
  514. #' @export
  515. #' @keywords deprecated
  516. stats_default <- function(data) {
  517. list("N" = length,
  518. "%" = percent,
  519. "Mean" = ~if (is.normal(.)) mean(.),
  520. "sd" = ~if (is.normal(.)) sd(.),
  521. "Med" = stats::median,
  522. "IQR" = ~if (!is.factor(.)) IQR(.))
  523. }
  524. #' @rdname stats_default
  525. #' @export
  526. stats_normal <- function(data) {
  527. list("N" = length,
  528. "%" = percent,
  529. "Mean" = mean,
  530. "sd" = stats::sd)
  531. }
  532. #' @rdname stats_default
  533. #' @export
  534. stats_nonnormal <- function(data) {
  535. list("N" = length,
  536. "%" = percent,
  537. "Median" = stats::median,
  538. "IQR" = ~if (!is.factor(.)) IQR(.))
  539. }