You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

425 lines
17KB

  1. #' Clear the default demodata tables
  2. #'
  3. #' Clear the default demodata tables
  4. #'
  5. #' @param host The host to connect to
  6. #' @param admin The admin account for the PostgreSQL database
  7. #' @param pass the password for the admin account
  8. #' @export
  9. clear_default_demodata <- function(host = "", admin = "", pass = "")
  10. {
  11. c("code_lookup",
  12. "concept_dimension",
  13. "encounter_mapping",
  14. "modifier_dimension",
  15. "observation_fact",
  16. "patient_dimension",
  17. "patient_mapping",
  18. "provider_dimension",
  19. "qt_analysis_plugin",
  20. "qt_analysis_plugin_result_type",
  21. "qt_patient_enc_collection",
  22. "qt_patient_set_collection",
  23. "qt_pdo_query_master",
  24. "qt_xml_result",
  25. "qt_query_result_instance",
  26. "qt_query_instance",
  27. "qt_query_master",
  28. "visit_dimension") %>%
  29. purrr::walk(~clear_table(stringr::str_c("i2b2demodata"), .x, host, admin, pass))
  30. }
  31. #' Clear the demodata tables
  32. #'
  33. #' Clear the demodata tables
  34. #'
  35. #' @param project The name of the project
  36. #' @param host The host to connect to
  37. #' @param admin The admin account for the PostgreSQL database
  38. #' @param pass the password for the admin account
  39. #' @export
  40. clear_demodata <- function(project, host = "", admin = "", pass = "")
  41. {
  42. c("code_lookup",
  43. "concept_dimension",
  44. "encounter_mapping",
  45. "modifier_dimension",
  46. "observation_fact",
  47. "patient_dimension",
  48. "patient_mapping",
  49. "provider_dimension",
  50. "qt_analysis_plugin",
  51. "qt_analysis_plugin_result_type",
  52. "qt_patient_enc_collection",
  53. "qt_patient_set_collection",
  54. "qt_pdo_query_master",
  55. "qt_xml_result",
  56. "qt_query_result_instance",
  57. "qt_query_instance",
  58. "qt_query_master",
  59. "visit_dimension") %>%
  60. purrr::walk(~clear_table(stringr::str_c("i2b2", project ,"data"), .x, host, admin, pass))
  61. }
  62. #' Delete modifiers
  63. #'
  64. #' Delete modifiers from modifier_dimension
  65. #'
  66. #' @param scheme The scheme to delete from the concepts
  67. #' @param project The name of the project
  68. #' @param host The host to connect to
  69. #' @param admin The admin account for the PostgreSQL database
  70. #' @param pass the password for the admin account
  71. #' @export
  72. delete_modifier <- function(scheme, project, host = "", admin = "", pass = "")
  73. {
  74. demodata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), user = admin, password = pass)
  75. RPostgreSQL::dbGetQuery(demodata, stringr::str_c("DELETE FROM modifier_dimension WHERE (modifier_cd LIKE '", scheme, ":%');"))
  76. RPostgreSQL::dbDisconnect(demodata)
  77. }
  78. list_modifier <- function(scheme, project, host = "", admin = "", pass = "")
  79. {
  80. dplyr::src_postgres(stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), host = host, user = admin, pass = pass) %>%
  81. dplyr::tbl("modifier_dimension") %>%
  82. dplyr::collect() %>%
  83. dplyr::filter(modifier_cd %>% stringr::str_detect(stringr::str_c(scheme, ":.*")))
  84. }
  85. #' Delete concepts
  86. #'
  87. #' Delete concepts from concept_dimension
  88. #'
  89. #' @param scheme The scheme to delete from the concepts
  90. #' @param project The name of the project
  91. #' @param host The host to connect to
  92. #' @param admin The admin account for the PostgreSQL database
  93. #' @param pass the password for the admin account
  94. #' @export
  95. delete_concept <- function(scheme, project, host = "", admin = "", pass = "")
  96. {
  97. demodata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), user = admin, password = pass)
  98. RPostgreSQL::dbGetQuery(demodata, stringr::str_c("DELETE FROM concept_dimension WHERE (concept_cd LIKE '", scheme, ":%');"))
  99. RPostgreSQL::dbDisconnect(demodata)
  100. }
  101. #' List concepts
  102. #'
  103. #' List the concepts corresponding to a scheme
  104. #'
  105. #' @param scheme The scheme to get the concepts from
  106. #' @param project The name of the project
  107. #' @param host The host to connect to
  108. #' @param admin The admin account for the PostgreSQL database
  109. #' @param pass the password for the admin account
  110. #' @return A list of concepts
  111. #' @export
  112. list_concepts <- function(scheme, project, host = "", admin = "", pass = "")
  113. {
  114. dplyr::src_postgres(stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), host = host, user = admin, pass = pass) %>%
  115. dplyr::tbl("concept_dimension") %>%
  116. dplyr::collect() %>%
  117. dplyr::filter(concept_cd %>% stringr::str_detect(stringr::str_c(scheme, ":.*")))
  118. }
  119. #' Populate the concept_dimension
  120. #'
  121. #' Populate the concept_dimension with new concepts
  122. #'
  123. #' ont is a character vector containing all the leaves of the ontology
  124. #' with their respective path, in the form
  125. #' code_level1 label_level1/code_level2 label_level2/.../code_leaf label_leaf
  126. #'
  127. #' @param ont The ontology to insert
  128. #' @param modi The modifiers to insert
  129. #' @param scheme The scheme to use for this ontology
  130. #' @param project The name of the project
  131. #' @param host The host to connect to
  132. #' @param admin The admin account for the PostgreSQL database
  133. #' @param pass the password for the admin account
  134. #' @export
  135. populate_concept <- function(ont, modi, scheme, project, host = "", admin = "", pass = "")
  136. {
  137. demodata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), user = admin, password = pass)
  138. # Sanitize the ontology
  139. ont %>%
  140. dplyr::mutate_all(~stringr::str_replace_all(., "'", "''")) ->
  141. ont
  142. if(! modi %>% is.null)
  143. {
  144. modi %>%
  145. dplyr::mutate_all(~stringr::str_replace_all(., "'", "''")) ->
  146. modi
  147. }
  148. # Get the name of the ontology from the scheme
  149. list_ont(host, admin, pass) %>%
  150. dplyr::filter(c_table_cd == scheme) %>%
  151. dplyr::pull(c_name) ->
  152. name
  153. # Create the data frame holding the contents of the new table
  154. data.frame(concept_path = ont$c_fullname, stringsAsFactors = F) %>%
  155. # Insert the name of the ontology at the root
  156. dplyr::mutate(concept_path = stringr::str_c("\\", name, "\\", concept_path)) %>%
  157. # Populate the other columns
  158. dplyr::mutate(name_char = stringr::str_extract(concept_path, "[^\\\\]+$"),
  159. concept_cd = stringr::str_c(scheme, ":", name_char %>% stringr::str_extract("^.+? ") %>% stringr::str_trim()),
  160. concept_cd = ifelse(is.na(concept_cd), "", concept_cd),
  161. concept_path = stringr::str_c(concept_path, "\\"),
  162. # Use only codes to build shorter paths
  163. concept_path = stringr::str_replace_all(concept_path, "\\\\(.+?) [^\\\\]+", "\\\\\\1"),
  164. update_date = format(Sys.Date(), "%m/%d/%Y")) %>%
  165. # Push the dataframe into the new ontology table
  166. dbPush(demodata, "concept_dimension")
  167. if (! modi %>% is.null)
  168. {
  169. modi %>%
  170. dplyr::mutate(name_char = c_fullname %>% stringr::str_extract(" .*$") %>% stringr::str_trim(),
  171. modifier_path = stringr::str_c("\\", name_char, "\\"),
  172. modifier_cd = stringr::str_c(scheme, ":", c_fullname %>% stringr::str_extract("^.+? ") %>% stringr::str_trim()),
  173. update_date = format(Sys.Date(), "%m/%d/%Y")) %>%
  174. dplyr::select(-c_fullname) %>%
  175. # Push the dataframe into the new ontology table
  176. dbPush(demodata, "modifier_dimension")
  177. }
  178. RPostgreSQL::dbDisconnect(demodata)
  179. }
  180. #' Populate the provider_dimension
  181. #'
  182. #' Populate the provider_dimension with new providers
  183. #'
  184. #' ont is a character vector containing all the leaves of the ontology
  185. #' with their respective path, in the form
  186. #' code_level1 label_level1/code_level2 label_level2/.../code_leaf label_leaf
  187. #'
  188. #' @param ont The ontology to insert
  189. #' @param scheme The scheme to use for this ontology
  190. #' @param project The name of the project
  191. #' @param host The host to connect to
  192. #' @param admin The admin account for the PostgreSQL database
  193. #' @param pass the password for the admin account
  194. #' @export
  195. populate_provider <- function(ont, scheme, project, host = "", admin = "", pass = "")
  196. {
  197. demodata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), user = admin, password = pass)
  198. # Sanitize the ontology
  199. ont %>%
  200. dplyr::mutate_all(~stringr::str_replace_all(., "'", "''")) ->
  201. ont
  202. # Get the name of the ontology from the scheme
  203. list_ont(host, admin, pass) %>%
  204. dplyr::filter(c_table_cd == scheme) %>%
  205. dplyr::pull(c_name) ->
  206. name
  207. # Create the data frame holding the contents of the new table
  208. data.frame(provider_path = ont$c_fullname, stringsAsFactors = F) %>%
  209. # Insert the name of the ontology at the root
  210. dplyr::mutate(provider_path = stringr::str_c("\\", name, "\\", provider_path)) %>%
  211. # Populate the other columns
  212. dplyr::mutate(name_char = stringr::str_extract(provider_path, "[^\\\\]+$"),
  213. provider_id = stringr::str_c(scheme, ":", name_char %>% stringr::str_extract("^.+? ") %>% stringr::str_trim()),
  214. provider_id = ifelse(is.na(provider_id), "", provider_id),
  215. provider_path = stringr::str_c(provider_path, "\\"),
  216. # Use only codes to build shorter paths
  217. provider_path = stringr::str_replace_all(provider_path, "\\\\(.+?) [^\\\\]+", "\\\\\\1"),
  218. update_date = format(Sys.Date(), "%m/%d/%Y")) %>%
  219. # Push the dataframe into the new ontology table
  220. dbPush(demodata, "provider_dimension")
  221. RPostgreSQL::dbDisconnect(demodata)
  222. }
  223. #' Add patients to the CRC cell
  224. #'
  225. #' Add patients to the CRC cell, generate new encrypted IDs,
  226. #'
  227. #' The patients dataframe must contain the following columns:
  228. #' - patient_ide: the original patient ID
  229. #' - birth_date: as a Date object
  230. #' - death_date: as a Date object
  231. #' - sex_cd (F or M)
  232. #'
  233. #' @param patients A dataframe of patients
  234. #' @param project The project to add the patients to
  235. #' @param host The host to connect to
  236. #' @param admin The admin account for the PostgreSQL database
  237. #' @param pass The password for the admin account
  238. #' @export
  239. add_patients_demodata <- function(patients, project, host = "", admin = "", pass = "")
  240. {
  241. demodata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), user = admin, password = pass)
  242. # Upsert patients mappings
  243. patients %>%
  244. dplyr::mutate(patient_ide_source = project,
  245. patient_ide_status = "A",
  246. project_id = project,
  247. patient_num = patient_ide,
  248. update_date = format(Sys.Date(), "%m/%d/%Y")) %>%
  249. dplyr::select(patient_ide, patient_ide_source, patient_num, patient_ide_status, project_id, update_date) %>%
  250. dbUpsert(demodata, "patient_mapping", c("patient_ide", "patient_ide_source", "project_id"))
  251. patients %>%
  252. dplyr::mutate(age_in_years_num = ifelse(is.na(death_date), floor(as.numeric(Sys.Date() - birth_date)/365.25), floor(as.numeric(death_date - birth_date)/365.25)),
  253. birth_date = ifelse(is.na(birth_date), NA, format(birth_date, format = "%m/%d/%Y %H:%M:%S")),
  254. death_date = ifelse(is.na(death_date), NA, format(death_date, format = "%m/%d/%Y %H:%M:%S")),
  255. vital_status_cd = ifelse(is.na(death_date), "N", "S"),
  256. update_date = format(Sys.Date(), "%m/%d/%Y"),
  257. patient_num = patient_ide) %>%
  258. dplyr::select(patient_num, vital_status_cd, birth_date, death_date, sex_cd, age_in_years_num, update_date) %>%
  259. dbUpsert(demodata, "patient_dimension", "patient_num")
  260. RPostgreSQL::dbDisconnect(demodata)
  261. }
  262. #' Add encounters to the CRC cell
  263. #'
  264. #' Add encounters to the CRC cell, generate new encrypted IDs
  265. #'
  266. #' The encounters dataframe must contain the following columns:
  267. #' - encounter_ide: the original encounter ID
  268. #' - patient_ide: the original patient ID
  269. #' - start_date: the start date of the encounter, as Date object
  270. #' - end_date: the end date of the encounter, as Date object
  271. #' - inout_cd: I or O if inpatient or outpatient
  272. #'
  273. #' @param encounters A dataframe of patients
  274. #' @param project The project to add the patients to
  275. #' @param host The host to connect to
  276. #' @param admin The admin account for the PostgreSQL database
  277. #' @param pass The password for the admin account
  278. #' @return An encounter mapping dataframe for the encounters
  279. #' @export
  280. add_encounters <- function(encounters, project, host = "", admin = "", pass = "")
  281. {
  282. demodata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), user = admin, password = pass)
  283. demodata %>%
  284. dplyr::tbl("encounter_mapping") %>%
  285. dplyr::select(encounter_ide, encounter_num) %>%
  286. dplyr::collect() ->
  287. mapping
  288. if (nrow(mapping) == 0)
  289. mapping <- data.frame(encounter_ide = character(0), encounter_num = numeric(0), stringsAsFactors = F)
  290. start <- ifelse(nrow(mapping) == 0, 1, max(mapping$encounter_num) + 1)
  291. encounters %>%
  292. dplyr::inner_join(mapping) -> mapped
  293. encounters %>%
  294. dplyr::anti_join(mapping) %>%
  295. dplyr::mutate(encounter_num = seq(start, length.out = nrow(.))) -> unmapped
  296. unmapped %>%
  297. dplyr::mutate(encounter_ide_source = project,
  298. encounter_ide_status = "A",
  299. project_id = project,
  300. patient_ide_source = "HIVE",
  301. update_date = format(Sys.Date(), "%m/%d/%Y")) %>%
  302. dplyr::select(encounter_ide, encounter_ide_source, project_id, encounter_num, patient_ide, patient_ide_source, encounter_ide_status, update_date) %>%
  303. dbUpsert(demodata, "encounter_mapping", c("encounter_ide", "encounter_ide_source", "project_id", "patient_ide", "patient_ide_source"))
  304. mapped %>%
  305. dplyr::bind_rows(unmapped) %>%
  306. dplyr::mutate(length_of_stay = ifelse(is.na(end_date), floor(as.numeric(Sys.Date() - start_date)), floor(as.numeric(end_date - start_date))),
  307. start_date = ifelse(is.na(start_date), NA, format(start_date, format = "%m/%d/%Y %H:%M:%S")),
  308. end_date = ifelse(is.na(end_date), NA, format(end_date, format = "%m/%d/%Y %H:%M:%S")),
  309. active_status_cd = ifelse(is.na(end_date), "O", "S"),
  310. patient_num = patient_ide,
  311. update_date = format(Sys.Date(), "%m/%d/%Y")) %>%
  312. dplyr::select(encounter_num, patient_num, active_status_cd, start_date, end_date, inout_cd, length_of_stay, update_date) %>%
  313. dbUpsert(demodata, "visit_dimension", c("encounter_num", "patient_num"))
  314. RPostgreSQL::dbDisconnect(demodata)
  315. }
  316. #' Add observations to the CRC cell
  317. #'
  318. #' Add observations to the CRC cell
  319. #'
  320. #' The observations dataframe must contain the following columns:
  321. #' - encounter_ide: the original encounter ID
  322. #' - patient_ide: the original patient ID
  323. #' - start_date: the start date of the encounter, as Date object
  324. #' - concept_cd: the concept to insert
  325. #' - provider_id: the provider
  326. #' - modifier_cd: optionnal modifier for the concept
  327. #' Other observation fact columns can optionnaly be included,
  328. #' such as end_date, valtype_cd, tval_char, nval_num, valueflag_cd, units_cd, etc.
  329. #'
  330. #' @param observations A dataframe of observation facts
  331. #' @param project The name of the project
  332. #' @param host The host to connect to
  333. #' @param admin The admin account for the PostgreSQL database
  334. #' @param pass The password for the admin account
  335. #' @export
  336. add_observations <- function(observations, project, host = "", admin = "", pass = "")
  337. {
  338. demodata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), user = admin, password = pass)
  339. # Get the encounter mapping
  340. demodata %>%
  341. dplyr::tbl("encounter_mapping") %>%
  342. dplyr::select(encounter_ide, encounter_num) %>%
  343. dplyr::collect() ->
  344. mapping
  345. observations %>%
  346. dplyr::inner_join(mapping) -> observations
  347. # create the text_search_index column
  348. RPostgreSQL::dbGetQuery(demodata, "SELECT max(text_search_index) from observation_fact;") %>%
  349. .$max -> nextval
  350. if (nextval %>% is.na)
  351. {
  352. RPostgreSQL::dbGetQuery(demodata, "SELECT nextval('observation_fact_text_search_index_seq'::regclass);") %>%
  353. .$nextval -> nextval
  354. }
  355. observations %>%
  356. dplyr::mutate(start_date = ifelse(is.na(start_date), NA, format(start_date, format = "%m/%d/%Y %H:%M:%S")),
  357. patient_num = patient_ide,
  358. update_date = format(Sys.Date(), "%m/%d/%Y"),
  359. text_search_index = seq(nextval+1, length.out = nrow(.))) %>%
  360. dplyr::group_by(patient_ide, encounter_ide, start_date, provider_id, concept_cd, modifier_cd) %>%
  361. dplyr::mutate(instance_num = seq(1, length.out = n())) %>%
  362. dplyr::ungroup() %>%
  363. dplyr::select(-patient_ide, -encounter_ide) %>%
  364. dbUpsert(demodata, "observation_fact", c("patient_num", "concept_cd", "modifier_cd", "start_date", "encounter_num", "instance_num", "provider_id"))
  365. RPostgreSQL::dbDisconnect(demodata)
  366. }
  367. #' Rebuild the indexes
  368. #'
  369. #' Rebuild the indexes in i2b2demodata
  370. #'
  371. #' @param project The name of the project
  372. #' @param host The host to connect to
  373. #' @param admin The admin account for the PostgreSQL database
  374. #' @param pass The password for the admin account
  375. #' @export
  376. rebuild_indexes_demodata <- function(project, host = "", admin = "", pass = "")
  377. {
  378. demodata <- RPostgreSQL::dbConnect(RPostgreSQL::PostgreSQL(), host = host, dbname = stringr::str_c("i2b2", stringr::str_to_lower(project), "data"), user = admin, password = pass)
  379. RPostgreSQL::dbGetQuery(demodata, stringr::str_c("REINDEX DATABASE i2b2", stringr::str_to_lower(project), "data;"))
  380. RPostgreSQL::dbDisconnect(demodata)
  381. }