You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

149 lines
4.6KB

  1. #' Transform any function into a valid stat function for the table
  2. #'
  3. #' Transform a function into a valid stat function for the table
  4. #'
  5. #' NA values are removed from the data
  6. #'
  7. #' Applying the function on a numerical vector should return one value
  8. #'
  9. #' Applying the function on a factor should return nlevels + 1 value, or one value per factor level
  10. #'
  11. #' See \code{parse_formula} for the usage for formulaes.
  12. #' @param f The function to try to apply, or a formula combining two functions
  13. #' @param x A vector
  14. #' @export
  15. #' @return The results for the function applied on the vector, compatible with the format of the result table
  16. statify <- function(x, f) {
  17. UseMethod("statify", f)
  18. }
  19. #' @rdname statify
  20. #' @export
  21. statify.default <- function(x, f) {
  22. # Discard NA values
  23. x <- stats::na.omit(x)
  24. # Try f(x), silent warnings and fail with NA
  25. res <- tryCatch(f(x),
  26. warning = function(e) suppressWarnings(f(x)),
  27. error = function(e) NA)
  28. # If x is a factor and f(x) behaves as expected (nlevel + total value), return f(x), or apply f(x) on each level, or fail with n+1 NA
  29. if (is.factor(x)) {
  30. if (length(res) == nlevels(x) + 1) res
  31. else if (length(res) == 1) {
  32. c(res, lapply(levels(x), function(lvl) {
  33. tryCatch(f(x[x == lvl]),
  34. warning = function(e) suppressWarnings(f(x[x == lvl])),
  35. error = function(e) NA)
  36. }) %>% unlist)
  37. }
  38. else rep(NA, nlevels(x) + 1)
  39. # If it is a numeric, return f(x) if it behaves as expected (ONE value), or fail with NA
  40. } else {
  41. if (length(res) == 1) {
  42. if (is.numeric(res) | is.na(res)) res
  43. else as.character(res)
  44. }
  45. else NA
  46. }
  47. }
  48. #' @rdname statify
  49. #' @export
  50. statify.formula <- function(x, f) {
  51. # if expression quoted with ~, evaluate the expression
  52. if (length(f) == 2) eval(f[[2]])
  53. # else parse the formula (cond ~ T | F)
  54. else statify.default(x, parse_formula(x, f))
  55. }
  56. #' Functions to create a list of statistics to use in desctable
  57. #'
  58. #' These functions take a dataframe as argument and return a list of statistcs in the form accepted by desctable.
  59. #'
  60. #' Already defined are
  61. #' \enumerate{
  62. #' \item stats_default with length, \%, mean, sd, med and IQR
  63. #' \item stats_normal with length, \%, mean and sd
  64. #' \item stats_nonnormal with length, %, median and IQR
  65. #' \item stats_auto, which picks stats depending of the data
  66. #' }
  67. #'
  68. #' You can define your own automatic functions, as long as they take a dataframe as argument and return a list of functions or formulas defining conditions to use a stat function.
  69. #'
  70. #' @param data The dataframe to apply the statistic to
  71. #' @return A list of statistics to use, potentially assessed from the dataframe
  72. #' @export
  73. stats_default <- function(data) {
  74. list("N" = length,
  75. "%" = percent,
  76. "Mean" = is.normal ~ mean,
  77. "sd" = is.normal ~ sd,
  78. "Med" = stats::median,
  79. "IQR" = is.factor ~ NA | IQR)
  80. }
  81. #' @rdname stats_default
  82. #' @export
  83. stats_normal <- function(data) {
  84. list("N" = length,
  85. "%" = percent,
  86. "Mean" = mean,
  87. "sd" = stats::sd)
  88. }
  89. #' @rdname stats_default
  90. #' @export
  91. stats_nonnormal <- function(data) {
  92. list("N" = length,
  93. "%" = percent,
  94. "Median" = stats::median,
  95. "IQR" = is.factor ~ NA | IQR)
  96. }
  97. #' @rdname stats_default
  98. #' @export
  99. stats_auto <- function(data) {
  100. data %>%
  101. Filter(f = is.numeric) %>%
  102. lapply(is.normal) %>%
  103. unlist() -> shapiro
  104. if (length(shapiro) == 0) {
  105. normal <- F
  106. nonnormal <- F
  107. } else {
  108. normal <- any(shapiro)
  109. nonnormal <- any(!shapiro)
  110. }
  111. data %>%
  112. lapply(is.factor) %>%
  113. unlist() %>%
  114. any() -> fact
  115. if (fact & normal & !nonnormal) stats_normal(data)
  116. else if (fact & !normal & nonnormal) stats_nonnormal(data)
  117. else if (fact & !normal & !nonnormal) list("N" = length,
  118. "%" = percent)
  119. else if (!fact & normal & nonnormal) list("N" = length,
  120. "Mean" = is.normal ~ mean,
  121. "sd" = is.normal ~ sd,
  122. "Med" = stats::median,
  123. "IQR" = is.factor ~ NA | IQR)
  124. else if (!fact & normal & !nonnormal) list("N" = length,
  125. "Mean" = mean,
  126. "sd" = stats::sd)
  127. else if (!fact & !normal & nonnormal) list("N" = length,
  128. "Med" = stats::median,
  129. "IQR" = IQR)
  130. else stats_default(data)
  131. }