You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

309 lines
14KB

  1. #' Clear the default metadata tables
  2. #'
  3. #' Clear the default metadata tables
  4. #'
  5. #' Drop the birn, custom_meta, i2b2 and icd10_icd9 tables
  6. #' Delete all schemes and table_access
  7. #' Insert the 'No scheme' scheme
  8. #'
  9. #' @param host The host to connect to
  10. #' @param admin The admin account for the PostgreSQL database
  11. #' @param pass the password for the admin account
  12. #' @export
  13. clear_default_metadata <- function(host = "", admin = "", pass = "")
  14. {
  15. metadata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = "i2b2metadata", user = admin, password = pass)
  16. # Drop default tables
  17. RPostgreSQL::dbGetQuery(metadata, "DROP TABLE birn;")
  18. RPostgreSQL::dbGetQuery(metadata, "DROP TABLE custom_meta;")
  19. RPostgreSQL::dbGetQuery(metadata, "DROP TABLE i2b2;")
  20. RPostgreSQL::dbGetQuery(metadata, "DROP TABLE icd10_icd9;")
  21. # Empty schemes and table_acess
  22. c("schemes", "table_access") %>%
  23. purrr::walk(~clear_table("i2b2metadata", .x, host, admin, pass))
  24. # Insert the 'empty' scheme
  25. RPostgreSQL::dbGetQuery(metadata, "INSERT INTO schemes VALUES ('', 'None', 'No scheme');")
  26. RPostgreSQL::dbDisconnect(metadata)
  27. }
  28. #' Delete an ontology from metadata
  29. #'
  30. #' Delete an existing ontology from metadata
  31. #'
  32. #' Delete the corresponding table
  33. #' Delete the scheme in schemes table
  34. #' Delete the entry in table_acess
  35. #'
  36. #' @param scheme The scheme to use for this ontology
  37. #' @param host The host to connect to
  38. #' @param admin The admin account for the PostgreSQL database
  39. #' @param pass the password for the admin account
  40. #' @export
  41. delete_ont<- function(scheme, host = "", admin = "", pass = "")
  42. {
  43. metadata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = "i2b2metadata", user = admin, password = pass)
  44. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("DROP TABLE ", scheme, ";"))
  45. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("DELETE FROM table_access WHERE (c_table_cd = '", scheme, "');"))
  46. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("DELETE FROM schemes WHERE (c_name = '", scheme, "');"))
  47. RPostgreSQL::dbDisconnect(metadata)
  48. }
  49. #' Add an ontology to metadata
  50. #'
  51. #' Add an empty ontology space
  52. #'
  53. #' Add a new empty table to metadata, with indexes
  54. #' Add a new scheme to the schemes table
  55. #' Add the corresponding table_access entry for the new table
  56. #'
  57. #' @param host The host to connect to
  58. #' @param admin The admin account for the PostgreSQL database
  59. #' @param pass the password for the admin account
  60. #' @param name The name of the new ontology
  61. #' @param scheme The scheme to use for this ontology
  62. #' @export
  63. add_ont <- function(name, scheme, host = "", admin = "", pass = "")
  64. {
  65. metadata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = "i2b2metadata", user = admin, password = pass)
  66. # Insert the new scheme
  67. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("INSERT INTO schemes VALUES ('", scheme, ":', '", scheme, "');"))
  68. # Insert the table_acess entry
  69. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("INSERT INTO table_access VALUES ('", scheme, "', '", scheme, "', 'N', 0, '\\", name, "\\', '", name, "', 'N', 'FA', NULL, NULL, NULL, 'concept_cd', 'concept_dimension', 'concept_path', 'T', 'LIKE', '\\", name, "\\', NULL, NULL, NULL, NULL, NULL, NULL);"))
  70. # Create the new table
  71. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("CREATE TABLE ", scheme, " (
  72. C_HLEVEL INT NOT NULL,
  73. C_FULLNAME VARCHAR(700) NOT NULL,
  74. C_NAME VARCHAR(2000) NOT NULL,
  75. C_SYNONYM_CD CHAR(1) NOT NULL,
  76. C_VISUALATTRIBUTES CHAR(3) NOT NULL,
  77. C_TOTALNUM INT NULL,
  78. C_BASECODE VARCHAR(50) NULL,
  79. C_METADATAXML TEXT NULL,
  80. C_FACTTABLECOLUMN VARCHAR(50) NOT NULL,
  81. C_TABLENAME VARCHAR(50) NOT NULL,
  82. C_COLUMNNAME VARCHAR(50) NOT NULL,
  83. C_COLUMNDATATYPE VARCHAR(50) NOT NULL,
  84. C_OPERATOR VARCHAR(10) NOT NULL,
  85. C_DIMCODE VARCHAR(700) NOT NULL,
  86. C_COMMENT TEXT NULL,
  87. C_TOOLTIP VARCHAR(900) NULL,
  88. M_APPLIED_PATH VARCHAR(700) NOT NULL,
  89. UPDATE_DATE timestamp NOT NULL,
  90. DOWNLOAD_DATE timestamp NULL,
  91. IMPORT_DATE timestamp NULL,
  92. SOURCESYSTEM_CD VARCHAR(50) NULL,
  93. VALUETYPE_CD VARCHAR(50) NULL,
  94. M_EXCLUSION_CD VARCHAR(25) NULL,
  95. C_PATH VARCHAR(700) NULL,
  96. C_SYMBOL VARCHAR(50) NULL,
  97. PLAIN_CODE VARCHAR(25) NULL);"))
  98. # Create the indexes for the new table
  99. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("CREATE INDEX META_FULLNAME_IDX_", scheme, " ON ", scheme, "(C_FULLNAME);"))
  100. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("CREATE INDEX META_APPL_PATH_", scheme, "_IDX ON ", scheme, "(M_APPLIED_PATH);"))
  101. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("CREATE INDEX META_EXCLUSION_", scheme, "_IDX ON ", scheme, "(M_EXCLUSION_CD);"))
  102. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("CREATE INDEX META_HLEVEL_", scheme, "_IDX ON ", scheme, "(C_HLEVEL);"))
  103. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("CREATE INDEX META_SYNONYM_", scheme, "_IDX ON ", scheme, "(C_SYNONYM_CD);"))
  104. # Give ownership of the new table to i2b2metadata
  105. RPostgreSQL::dbGetQuery(metadata, stringr::str_c("ALTER TABLE ", scheme, " OWNER TO i2b2metadata;"))
  106. RPostgreSQL::dbDisconnect(metadata)
  107. }
  108. #' Populate an empty ontology table
  109. #'
  110. #' Populate an empty ontology table
  111. #'
  112. #' Populate an ontology table
  113. #' ont is a dataframe containing at least the c_fullname column, a character vector containing all the leaves of the ontology
  114. #' with their respective path, in the form
  115. #' code_level1 label_level1\\code_level2 label_level2\\...\\code_leaf label_leaf
  116. #' The function rebuilds the folders automatically
  117. #'
  118. #' modi is a dataframe containing at least the c_fullname column, a character vector containing the modifiers, in the form
  119. #' code_modi label_modi
  120. #' The modifiers apply on all the ontology
  121. #'
  122. #' @param ont The ontology to insert
  123. #' @param modi The modifiers to insert
  124. #' @param scheme The scheme to use for this ontology
  125. #' @param include_code Whether to include the code in the label or not
  126. #' @param host The host to connect to
  127. #' @param admin The admin account for the PostgreSQL database
  128. #' @param pass the password for the admin account
  129. #' @param def_columndatatype Default value for that column
  130. #' @param def_tablename Default value for that column
  131. #' @param def_operator Default value for that column
  132. #' @param def_facttablecolumn Default value for that column
  133. #' @param def_columnname Default value for that column
  134. #' @export
  135. populate_ont <- function(ont, modi = NULL, scheme, include_code = T, def_facttablecolumn = "concept_cd", def_tablename = "concept_dimension", def_columnname = "concept_path", def_columndatatype = "T", def_operator = "LIKE", host = "", admin = "", pass = "")
  136. {
  137. metadata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = "i2b2metadata", user = admin, password = pass)
  138. # Sanitize the ontology
  139. ont %>%
  140. dplyr::mutate_all(~stringr::str_replace_all(., "'", "''")) ->
  141. ont
  142. if(! modi %>% is.null)
  143. {
  144. modi %>%
  145. dplyr::mutate_all(~stringr::str_replace_all(., "'", "''")) ->
  146. modi
  147. }
  148. # Tag explicit folders and root leaves
  149. ont %>%
  150. dplyr::mutate(type = dplyr::case_when(c_fullname %>% purrr::map_lgl(~stringr::str_detect(setdiff(c_fullname, .x), stringr::fixed(.x)) %>% any) ~ "folder",
  151. c_fullname %>% stringr::str_detect("\\\\") ~ "leaf",
  152. T ~ "root_leaf"),
  153. c_visualattributes = ifelse(type == "folder", "FA", "LA")) ->
  154. ont
  155. # Add the folders for orphaned leaves by 'deconstructing' the paths
  156. ont %>%
  157. dplyr::filter(! (purrr::map(ont$c_fullname[ont$type == "folder"], ~stringr::str_detect(ont$c_fullname, .x)) %>% purrr::reduce(`|`) %||% F),
  158. type == "leaf") %>%
  159. dplyr::pull(c_fullname) ->
  160. leaves
  161. while (any(stringr::str_detect(leaves, "\\\\")))
  162. {
  163. leaves %>%
  164. stringr::str_replace("\\\\[^\\\\]+$", "") %>%
  165. unique ->
  166. leaves
  167. ont %>%
  168. dplyr::add_row(c_fullname = leaves, c_visualattributes = "FA") ->
  169. ont
  170. }
  171. # Get the name of the ontology from the scheme
  172. list_ont(host, admin, pass) %>%
  173. dplyr::filter(c_table_cd == scheme) %>%
  174. dplyr::pull(c_name) ->
  175. name
  176. ont %>%
  177. # Delete temp type variable
  178. dplyr::select(-type) %>%
  179. # Discard duplicated paths
  180. dplyr::distinct() %>%
  181. # Insert the name of the ontology at the root
  182. dplyr::mutate(c_fullname = stringr::str_c("\\", name, "\\", c_fullname)) %>%
  183. dplyr::add_row(c_fullname = stringr::str_c("\\", name), c_visualattributes = "FA") %>%
  184. # Populate the other columns if they are not given, with a default to concept_dimension
  185. dplyr::bind_cols(tibble::tibble(c_synonym_cd = rep(NA, nrow(.)),
  186. c_facttablecolumn = rep(NA, nrow(.)),
  187. c_tablename = rep(NA, nrow(.)),
  188. c_columnname = rep(NA, nrow(.)),
  189. c_columndatatype = rep(NA, nrow(.)),
  190. c_operator = rep(NA, nrow(.)),
  191. c_tooltip = rep(NA, nrow(.)),
  192. c_dimcode = rep(NA, nrow(.)))) %>%
  193. dplyr::select(-dplyr::ends_with("1")) %>%
  194. dplyr::mutate(c_hlevel = stringr::str_count(c_fullname, "\\\\") - 1,
  195. c_name = stringr::str_extract(c_fullname, "[^\\\\]+$"),
  196. c_basecode = stringr::str_c(scheme, ":", c_name %>% stringr::str_extract("^.+? ") %>% stringr::str_trim()),
  197. c_basecode = ifelse(is.na(c_basecode), "", c_basecode),
  198. c_synonym_cd = ifelse(is.na(c_synonym_cd), "N", c_synonymcd),
  199. c_facttablecolumn = ifelse(is.na(c_facttablecolumn), def_facttablecolumn,c_facttablecolumn),
  200. c_tablename = ifelse(is.na(c_tablename), def_tablename, c_tablename),
  201. c_columnname = ifelse(is.na(c_columnname), def_columnname, c_columnname),
  202. c_columndatatype = ifelse(is.na(c_columndatatype), def_columndatatype, c_columndatatype),
  203. c_operator = ifelse(is.na(c_operator), def_operator, c_operator),
  204. c_tooltip = ifelse(is.na(c_tooltip), c_name, c_tooltip),
  205. m_applied_path = "@",
  206. c_fullname = stringr::str_c(c_fullname, "\\"),
  207. # Use only codes to build shorter paths
  208. c_fullname = stringr::str_replace_all(c_fullname, "\\\\(.+?) [^\\\\]+", "\\\\\\1"),
  209. c_dimcode = ifelse(is.na(c_dimcode), c_fullname, c_dimcode),
  210. update_date = format(Sys.Date(), "%m/%d/%Y")) ->
  211. ont
  212. if (!include_code)
  213. ont$c_name[ont$c_hlevel > 0] <- ont$c_name[ont$c_hlevel > 0] %>% stringr::str_extract(" .*$") %>% stringr::str_trim()
  214. # Push the dataframe into the new ontology table
  215. dbPush(ont, metadata, scheme)
  216. if (length(modi) > 0)
  217. {
  218. modi %>%
  219. dplyr::mutate(c_hlevel = 1,
  220. c_name = c_fullname %>% stringr::str_extract(" .*$") %>% stringr::str_trim(),
  221. c_synonym_cd = "N",
  222. c_visualattributes = "RA",
  223. c_basecode = stringr::str_c(scheme, ":", c_fullname %>% stringr::str_extract("^.+? ") %>% stringr::str_trim()),
  224. c_fullname = stringr::str_c("\\", c_name, "\\"),
  225. c_facttablecolumn = "modifier_cd",
  226. c_tablename = "modifier_dimension",
  227. c_columnname = "modifier_path",
  228. c_columndatatype = "T",
  229. c_operator = "LIKE",
  230. c_tooltip = c_name,
  231. c_dimcode = c_fullname,
  232. m_applied_path = stringr::str_c("\\", name, "\\%"),
  233. update_date = format(Sys.Date(), "%m/%d/%Y")) ->
  234. modi
  235. # Push the dataframe into the new ontology table
  236. dbPush(modi, metadata, scheme)
  237. }
  238. RPostgreSQL::dbDisconnect(metadata)
  239. }
  240. #' List the available ontologies
  241. #'
  242. #' @param host The host to connect to
  243. #' @param admin The admin account for the PostgreSQL database
  244. #' @param pass the password for the admin account
  245. #' @export
  246. list_ont <- function(host = "", admin = "", pass = "")
  247. {
  248. dplyr::src_postgres("i2b2metadata", host = host, user = admin, pass = pass) %>%
  249. dplyr::tbl("table_access") %>%
  250. dplyr::collect()
  251. }
  252. #' List the available schemes
  253. #'
  254. #' @param host The host to connect to
  255. #' @param admin The admin account for the PostgreSQL database
  256. #' @param pass the password for the admin account
  257. #' @export
  258. list_schemes <- function(host = "", admin = "", pass = "")
  259. {
  260. dplyr::src_postgres("i2b2metadata", host = host, user = admin, pass = pass) %>%
  261. dplyr::tbl("schemes") %>%
  262. dplyr::collect()
  263. }
  264. #' Fetch an ontology
  265. #'
  266. #' @param ont The name of the ontology, from the c_table_name column in list_ont()
  267. #' @param host The host to connect to
  268. #' @param admin The admin account for the PostgreSQL database
  269. #' @param pass the password for the admin account
  270. #' @export
  271. get_ont <- function(ont, host = "", admin = "", pass = "")
  272. {
  273. dplyr::src_postgres("i2b2metadata", host = host, user = admin, pass = pass) %>%
  274. dplyr::tbl(stringr::str_to_lower(ont)) %>%
  275. dplyr::collect()
  276. }