From 6bfa44af4ceda4cfc8cc620ef1d2b02fe341d56d Mon Sep 17 00:00:00 2001 From: Maxime Wack Date: Thu, 24 Aug 2017 07:42:31 -0400 Subject: [PATCH] Checks and documentation updates --- DESCRIPTION | 5 +- NAMESPACE | 1 + R/demodata.R | 4 +- R/fresh_install.R | 104 ++++++++++++++++++---------------- R/import.R | 1 - man/add_encounters.Rd | 2 +- man/add_observations.Rd | 4 -- man/import_patients_visits.Rd | 32 +++++++++++ 8 files changed, 93 insertions(+), 60 deletions(-) create mode 100644 man/import_patients_visits.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 0b2b347..7c86c63 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -17,5 +17,8 @@ Imports: RPostgreSQL, httr, rvest, - xml2 + xml2, + lubridate, + tidyr, + readr RoxygenNote: 6.0.1 diff --git a/NAMESPACE b/NAMESPACE index 7c80c14..ea2c3b2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -29,6 +29,7 @@ export(delete_users) export(fresh_install) export(get_domain) export(get_ont) +export(import_patients_visits) export(list_concepts) export(list_ont) export(list_projects) diff --git a/R/demodata.R b/R/demodata.R index 9e41bb9..86f0218 100644 --- a/R/demodata.R +++ b/R/demodata.R @@ -365,8 +365,6 @@ add_encounters <- function(encounters, project, host = "", admin = "", pass = "" #' #' @param observations A dataframe of observation facts #' @param project The name of the project -#' @param patient_mapping The patient mapping table -#' @param encounter_mapping The encounter mapping table #' @param host The host to connect to #' @param admin The admin account for the PostgreSQL database #' @param pass The password for the admin account @@ -401,7 +399,7 @@ add_observations <- function(observations, project, host = "", admin = "", pass update_date = format(Sys.Date(), "%m/%d/%Y"), text_search_index = seq(nextval+1, length.out = nrow(.))) %>% dplyr::group_by(patient_ide, encounter_ide, start_date, provider_id, concept_cd, modifier_cd) %>% - dplyr::mutate(instance_num = seq(1, length.out = n())) %>% + dplyr::mutate(instance_num = seq(1, length.out = dplyr::n())) %>% dplyr::ungroup() %>% dplyr::select(-patient_ide, -encounter_ide) %>% dbUpsert(demodata, "observation_fact", c("patient_num", "concept_cd", "modifier_cd", "start_date", "encounter_num", "instance_num", "provider_id")) diff --git a/R/fresh_install.R b/R/fresh_install.R index e7f7ea8..03dca2a 100644 --- a/R/fresh_install.R +++ b/R/fresh_install.R @@ -140,7 +140,7 @@ pop_obgyn <- function() UM <- seq(6040, 6100, 10) c(UM, 620) %>% - map(add_ontologies) + purrr::map(add_ontologies) # 2016 readr::read_csv("/manip/pims16.csv", col_types = readr::cols(.default = readr::col_character())) %>% @@ -153,27 +153,27 @@ pop_obgyn <- function() readr::read_csv("/manip/diags16.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "end_date", "provider_id", "concept_cd", "modifier_cd")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_diagnostics(620) readr::read_csv("/manip/actes16.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "provider_id", "concept_cd", "start_date")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_actes(620) readr::read_csv("/manip/mensurations16.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "poids", "taille", "IMC")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_mensurations(patients, 620) readr::read_csv("/manip/bio16_1.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_bios(patients, 620) readr::read_csv("/manip/bio16_2.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_bios(patients, 620) # 2017 @@ -187,26 +187,26 @@ pop_obgyn <- function() readr::read_csv("/manip/diags17.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "end_date", "provider_id", "concept_cd", "modifier_cd")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_diagnostics(620) readr::read_csv("/manip/actes17.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "provider_id", "concept_cd", "start_date")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_actes(620) readr::read_csv("/manip/mensurations17.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "poids", "taille", "IMC")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_mensurations(patients, 620) readr::read_csv("/manip/bios17.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_bios(patients, 620) UM %>% - map(function(x) + purrr::map(function(x) { # 2016 readr::read_csv("/manip/pims16.csv", col_types = readr::cols(.default = readr::col_character())) %>% @@ -219,27 +219,27 @@ pop_obgyn <- function() readr::read_csv("/manip/diags16.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "end_date", "provider_id", "concept_cd", "modifier_cd")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_diagnostics(x) readr::read_csv("/manip/actes16.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "provider_id", "concept_cd", "start_date")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_actes(x) readr::read_csv("/manip/mensurations16.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "poids", "taille", "IMC")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_mensurations(patients, x) readr::read_csv("/manip/bio16_1.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_bios(patients, x) readr::read_csv("/manip/bio16_2.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_bios(patients, x) # 2017 @@ -253,22 +253,22 @@ pop_obgyn <- function() readr::read_csv("/manip/diags17.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "end_date", "provider_id", "concept_cd", "modifier_cd")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_diagnostics(x) readr::read_csv("/manip/actes17.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "provider_id", "concept_cd", "start_date")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_actes(x) readr::read_csv("/manip/mensurations17.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "poids", "taille", "IMC")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_mensurations(patients, x) readr::read_csv("/manip/bios17.csv", col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>% - semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% + dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>% import_bios(patients, x) }) @@ -330,9 +330,8 @@ read_patients <- function(file) "rum_end", "provider_id", "project")) %>% - dplyr::filter(!is.na(patient_ide)) %>% - dplyr::mutate(patient_ide = sanitize_encounter(patient_ide) - encounter_ide = sanitize_encounter(encounter_ide, start_date) + dplyr::mutate(patient_ide = sanitize_encounter(patient_ide), + encounter_ide = sanitize_encounter(encounter_ide, start_date), start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"), end_date = end_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"), sex_cd = ifelse(sex_cd == "1", "M", "F"), @@ -343,38 +342,25 @@ read_patients <- function(file) provider_id = stringr::str_c("STRUCT:", provider_id)) } -# TODO: check start_date and join with patients df -read_mensurations <- function(file) -{ - readr::read_csv(file, col_types = readr::cols(.default = readr::col_character())) %>% - stats::setNames(c("patient_ide", - "encounter_ide", - "poids", - "taille", - "IMC")) %>% - dplyr::filter(!is.na(patient_ide)) %>% - dplyr::mutate(patient_ide = sanitize_patient(patient_ide), - encounter_ide = sanitize_encounter(encounter_ide, start_date)) -} - read_diagnostics <- function(file) { readr::read_csv(file, col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", + "enc_start_date", "start_date", "end_date", "provider_id", "concept_cd", "modifier_cd")) %>% - dplyr::filter(!is.na(concept_cd)) %>% - dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, start_date), + dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, enc_start_date), patient_ide = sanitize_patient(patient_ide), start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"), end_date = end_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"), provider_id = stringr::str_c("STRUCT:", provider_id), concept_cd = stringr::str_c("CIM:", concept_cd), - modifier_cd = stringr::str_c("CIM:", modifier_cd)) + modifier_cd = stringr::str_c("CIM:", modifier_cd)) %>% + dplyr::select(-enc_start_date) } read_actes <- function(file) @@ -382,16 +368,37 @@ read_actes <- function(file) readr::read_csv(file, col_types = readr::cols(.default = readr::col_character())) %>% stats::setNames(c("patient_ide", "encounter_ide", + "enc_start_date", "provider_id", "concept_cd", "start_date")) %>% - dplyr::filter(!is.na(concept_cd), - !is.na(start_date)) %>% - dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, start_date), + dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, enc_start_date), patient_ide = sanitize_patient(patient_ide), provider_id = stringr::str_c("STRUCT:", provider_id), concept_cd = stringr::str_c("CCAM:", concept_cd), - start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S")) + start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S")) %>% + dplyr::select(-enc_start_date) +} + +read_mensurations <- function(file) +{ + readr::read_csv(file, col_types = readr::cols(.default = readr::col_character())) %>% + stats::setNames(c("patient_ide", + "encounter_ide", + "enc_start_date", + "poids", + "taille", + "IMC")) %>% + dplyr::mutate(patient_ide = sanitize_patient(patient_ide), + encounter_ide = sanitize_encounter(encounter_ide, enc_start_date)) %>% + dplyr::select(-enc_start_date) %>% + tidyr::gather(concept_cd, nval_num, poids, taille, IMC) %>% + dplyr::filter(!is.na(nval_num)) %>% + dplyr::mutate(concept_cd = stringr::str_c("HOS:", concept_cd), + modifier_cd = "@", + valtype_cd = "N", + tval_char = "E", + nval_num = nval_num %>% stringr::str_replace(",", ".")) } read_bios <- function(file) @@ -401,20 +408,17 @@ read_bios <- function(file) bios %>% stats::setNames(c("patient_ide", "encounter_ide", + "enc_start_date", "start_date", "concept_cd", "nval_num")) %>% - dplyr::filter(!is.na(concept_cd), - !is.na(nval_num), - !is.na(start_date), - !concept_cd %in% c("MB_SGT_AER_CB", "MB_SGT_ANA_CB", "MB_LP_TC", "MB_SGT_PED_CB", "MB_CS_NUM_DON_RC", "MB_ANTIBIO_RC")) %>% dplyr::left_join(mapping, by = c("concept_cd" = "from")) %>% - dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, start_date), + dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, enc_start_date), patient_ide = sanitize_patient(patient_ide), start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"), concept_cd = ifelse(!is.na(to), to, concept_cd), concept_cd = stringr::str_c("BIO:", concept_cd)) %>% - dplyr::select(-to) + dplyr::select(-to, -enc_start_date) } sanitize_encounter <- function(encounter_ide, start_date) diff --git a/R/import.R b/R/import.R index b4ff8dc..911d169 100644 --- a/R/import.R +++ b/R/import.R @@ -19,7 +19,6 @@ #' #' @param patients A formatted dataframe with correctly named columns #' @param project The project to add the data to -#' @return #' @export import_patients_visits <- function(patients, project) { diff --git a/man/add_encounters.Rd b/man/add_encounters.Rd index be959d7..42ecfab 100644 --- a/man/add_encounters.Rd +++ b/man/add_encounters.Rd @@ -29,5 +29,5 @@ The encounters dataframe must contain the following columns: - patient_ide: the original patient ID - start_date: the start date of the encounter, as Date object - end_date: the end date of the encounter, as Date object -- inout: I or O if inpatient or outpatient +- inout_cd: I or O if inpatient or outpatient } diff --git a/man/add_observations.Rd b/man/add_observations.Rd index 2e7aa76..902167e 100644 --- a/man/add_observations.Rd +++ b/man/add_observations.Rd @@ -16,10 +16,6 @@ add_observations(observations, project, host = "", admin = "", pass = "") \item{admin}{The admin account for the PostgreSQL database} \item{pass}{The password for the admin account} - -\item{patient_mapping}{The patient mapping table} - -\item{encounter_mapping}{The encounter mapping table} } \description{ Add observations to the CRC cell diff --git a/man/import_patients_visits.Rd b/man/import_patients_visits.Rd new file mode 100644 index 0000000..0ee6dda --- /dev/null +++ b/man/import_patients_visits.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/import.R +\name{import_patients_visits} +\alias{import_patients_visits} +\title{Import patients and their visits} +\usage{ +import_patients_visits(patients, project) +} +\arguments{ +\item{patients}{A formatted dataframe with correctly named columns} + +\item{project}{The project to add the data to} +} +\description{ +Import patients and their visits +} +\details{ +Import the patient_dimension and visit_dimension death_data +As well as creating the mappings and add visit age observations + +Structure for patient dataframe: +- patient_ide : character +- encounter_ide : character +- start_date : Date +- end_date : Date +- rum_start : Date +- rum_end : Date +- birth_date : Date +- death_date : Date +- sex_cd : char, 'M' or 'F' +- provider_id : char, 'STRUCT:xxx' +}