Browse Source

Checks and documentation updates

master
Maxime Wack 6 years ago
parent
commit
6bfa44af4c
8 changed files with 93 additions and 60 deletions
  1. +4
    -1
      DESCRIPTION
  2. +1
    -0
      NAMESPACE
  3. +1
    -3
      R/demodata.R
  4. +54
    -50
      R/fresh_install.R
  5. +0
    -1
      R/import.R
  6. +1
    -1
      man/add_encounters.Rd
  7. +0
    -4
      man/add_observations.Rd
  8. +32
    -0
      man/import_patients_visits.Rd

+ 4
- 1
DESCRIPTION View File

@@ -17,5 +17,8 @@ Imports:
RPostgreSQL,
httr,
rvest,
xml2
xml2,
lubridate,
tidyr,
readr
RoxygenNote: 6.0.1

+ 1
- 0
NAMESPACE View File

@@ -29,6 +29,7 @@ export(delete_users)
export(fresh_install)
export(get_domain)
export(get_ont)
export(import_patients_visits)
export(list_concepts)
export(list_ont)
export(list_projects)


+ 1
- 3
R/demodata.R View File

@@ -365,8 +365,6 @@ add_encounters <- function(encounters, project, host = "", admin = "", pass = ""
#'
#' @param observations A dataframe of observation facts
#' @param project The name of the project
#' @param patient_mapping The patient mapping table
#' @param encounter_mapping The encounter mapping table
#' @param host The host to connect to
#' @param admin The admin account for the PostgreSQL database
#' @param pass The password for the admin account
@@ -401,7 +399,7 @@ add_observations <- function(observations, project, host = "", admin = "", pass
update_date = format(Sys.Date(), "%m/%d/%Y"),
text_search_index = seq(nextval+1, length.out = nrow(.))) %>%
dplyr::group_by(patient_ide, encounter_ide, start_date, provider_id, concept_cd, modifier_cd) %>%
dplyr::mutate(instance_num = seq(1, length.out = n())) %>%
dplyr::mutate(instance_num = seq(1, length.out = dplyr::n())) %>%
dplyr::ungroup() %>%
dplyr::select(-patient_ide, -encounter_ide) %>%
dbUpsert(demodata, "observation_fact", c("patient_num", "concept_cd", "modifier_cd", "start_date", "encounter_num", "instance_num", "provider_id"))


+ 54
- 50
R/fresh_install.R View File

@@ -140,7 +140,7 @@ pop_obgyn <- function()
UM <- seq(6040, 6100, 10)

c(UM, 620) %>%
map(add_ontologies)
purrr::map(add_ontologies)

# 2016
readr::read_csv("/manip/pims16.csv", col_types = readr::cols(.default = readr::col_character())) %>%
@@ -153,27 +153,27 @@ pop_obgyn <- function()

readr::read_csv("/manip/diags16.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "end_date", "provider_id", "concept_cd", "modifier_cd")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_diagnostics(620)

readr::read_csv("/manip/actes16.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "provider_id", "concept_cd", "start_date")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_actes(620)

readr::read_csv("/manip/mensurations16.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "poids", "taille", "IMC")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_mensurations(patients, 620)

readr::read_csv("/manip/bio16_1.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_bios(patients, 620)

readr::read_csv("/manip/bio16_2.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_bios(patients, 620)

# 2017
@@ -187,26 +187,26 @@ pop_obgyn <- function()

readr::read_csv("/manip/diags17.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "end_date", "provider_id", "concept_cd", "modifier_cd")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_diagnostics(620)

readr::read_csv("/manip/actes17.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "provider_id", "concept_cd", "start_date")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_actes(620)

readr::read_csv("/manip/mensurations17.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "poids", "taille", "IMC")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_mensurations(patients, 620)

readr::read_csv("/manip/bios17.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_bios(patients, 620)

UM %>%
map(function(x)
purrr::map(function(x)
{
# 2016
readr::read_csv("/manip/pims16.csv", col_types = readr::cols(.default = readr::col_character())) %>%
@@ -219,27 +219,27 @@ pop_obgyn <- function()

readr::read_csv("/manip/diags16.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "end_date", "provider_id", "concept_cd", "modifier_cd")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_diagnostics(x)

readr::read_csv("/manip/actes16.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "provider_id", "concept_cd", "start_date")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_actes(x)

readr::read_csv("/manip/mensurations16.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "poids", "taille", "IMC")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_mensurations(patients, x)

readr::read_csv("/manip/bio16_1.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_bios(patients, x)

readr::read_csv("/manip/bio16_2.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_bios(patients, x)

# 2017
@@ -253,22 +253,22 @@ pop_obgyn <- function()

readr::read_csv("/manip/diags17.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "end_date", "provider_id", "concept_cd", "modifier_cd")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_diagnostics(x)

readr::read_csv("/manip/actes17.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "provider_id", "concept_cd", "start_date")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_actes(x)

readr::read_csv("/manip/mensurations17.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "poids", "taille", "IMC")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_mensurations(patients, x)

readr::read_csv("/manip/bios17.csv", col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide", "encounter_ide", "start_date", "concept_cd", "nval_num")) %>%
semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
dplyr::semi_join(patients, by = c("patient_ide", "encounter_ide")) %>%
import_bios(patients, x)

})
@@ -330,9 +330,8 @@ read_patients <- function(file)
"rum_end",
"provider_id",
"project")) %>%
dplyr::filter(!is.na(patient_ide)) %>%
dplyr::mutate(patient_ide = sanitize_encounter(patient_ide)
encounter_ide = sanitize_encounter(encounter_ide, start_date)
dplyr::mutate(patient_ide = sanitize_encounter(patient_ide),
encounter_ide = sanitize_encounter(encounter_ide, start_date),
start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"),
end_date = end_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"),
sex_cd = ifelse(sex_cd == "1", "M", "F"),
@@ -343,38 +342,25 @@ read_patients <- function(file)
provider_id = stringr::str_c("STRUCT:", provider_id))
}

# TODO: check start_date and join with patients df
read_mensurations <- function(file)
{
readr::read_csv(file, col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide",
"encounter_ide",
"poids",
"taille",
"IMC")) %>%
dplyr::filter(!is.na(patient_ide)) %>%
dplyr::mutate(patient_ide = sanitize_patient(patient_ide),
encounter_ide = sanitize_encounter(encounter_ide, start_date))
}

read_diagnostics <- function(file)
{
readr::read_csv(file, col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide",
"encounter_ide",
"enc_start_date",
"start_date",
"end_date",
"provider_id",
"concept_cd",
"modifier_cd")) %>%
dplyr::filter(!is.na(concept_cd)) %>%
dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, start_date),
dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, enc_start_date),
patient_ide = sanitize_patient(patient_ide),
start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"),
end_date = end_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"),
provider_id = stringr::str_c("STRUCT:", provider_id),
concept_cd = stringr::str_c("CIM:", concept_cd),
modifier_cd = stringr::str_c("CIM:", modifier_cd))
modifier_cd = stringr::str_c("CIM:", modifier_cd)) %>%
dplyr::select(-enc_start_date)
}

read_actes <- function(file)
@@ -382,16 +368,37 @@ read_actes <- function(file)
readr::read_csv(file, col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide",
"encounter_ide",
"enc_start_date",
"provider_id",
"concept_cd",
"start_date")) %>%
dplyr::filter(!is.na(concept_cd),
!is.na(start_date)) %>%
dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, start_date),
dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, enc_start_date),
patient_ide = sanitize_patient(patient_ide),
provider_id = stringr::str_c("STRUCT:", provider_id),
concept_cd = stringr::str_c("CCAM:", concept_cd),
start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"))
start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S")) %>%
dplyr::select(-enc_start_date)
}

read_mensurations <- function(file)
{
readr::read_csv(file, col_types = readr::cols(.default = readr::col_character())) %>%
stats::setNames(c("patient_ide",
"encounter_ide",
"enc_start_date",
"poids",
"taille",
"IMC")) %>%
dplyr::mutate(patient_ide = sanitize_patient(patient_ide),
encounter_ide = sanitize_encounter(encounter_ide, enc_start_date)) %>%
dplyr::select(-enc_start_date) %>%
tidyr::gather(concept_cd, nval_num, poids, taille, IMC) %>%
dplyr::filter(!is.na(nval_num)) %>%
dplyr::mutate(concept_cd = stringr::str_c("HOS:", concept_cd),
modifier_cd = "@",
valtype_cd = "N",
tval_char = "E",
nval_num = nval_num %>% stringr::str_replace(",", "."))
}

read_bios <- function(file)
@@ -401,20 +408,17 @@ read_bios <- function(file)
bios %>%
stats::setNames(c("patient_ide",
"encounter_ide",
"enc_start_date",
"start_date",
"concept_cd",
"nval_num")) %>%
dplyr::filter(!is.na(concept_cd),
!is.na(nval_num),
!is.na(start_date),
!concept_cd %in% c("MB_SGT_AER_CB", "MB_SGT_ANA_CB", "MB_LP_TC", "MB_SGT_PED_CB", "MB_CS_NUM_DON_RC", "MB_ANTIBIO_RC")) %>%
dplyr::left_join(mapping, by = c("concept_cd" = "from")) %>%
dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, start_date),
dplyr::mutate(encounter_ide = sanitize_encounter(encounter_ide, enc_start_date),
patient_ide = sanitize_patient(patient_ide),
start_date = start_date %>% as.Date(format = "%Y/%m/%d %H:%M:%S"),
concept_cd = ifelse(!is.na(to), to, concept_cd),
concept_cd = stringr::str_c("BIO:", concept_cd)) %>%
dplyr::select(-to)
dplyr::select(-to, -enc_start_date)
}

sanitize_encounter <- function(encounter_ide, start_date)


+ 0
- 1
R/import.R View File

@@ -19,7 +19,6 @@
#'
#' @param patients A formatted dataframe with correctly named columns
#' @param project The project to add the data to
#' @return
#' @export
import_patients_visits <- function(patients, project)
{


+ 1
- 1
man/add_encounters.Rd View File

@@ -29,5 +29,5 @@ The encounters dataframe must contain the following columns:
- patient_ide: the original patient ID
- start_date: the start date of the encounter, as Date object
- end_date: the end date of the encounter, as Date object
- inout: I or O if inpatient or outpatient
- inout_cd: I or O if inpatient or outpatient
}

+ 0
- 4
man/add_observations.Rd View File

@@ -16,10 +16,6 @@ add_observations(observations, project, host = "", admin = "", pass = "")
\item{admin}{The admin account for the PostgreSQL database}

\item{pass}{The password for the admin account}

\item{patient_mapping}{The patient mapping table}

\item{encounter_mapping}{The encounter mapping table}
}
\description{
Add observations to the CRC cell


+ 32
- 0
man/import_patients_visits.Rd View File

@@ -0,0 +1,32 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/import.R
\name{import_patients_visits}
\alias{import_patients_visits}
\title{Import patients and their visits}
\usage{
import_patients_visits(patients, project)
}
\arguments{
\item{patients}{A formatted dataframe with correctly named columns}

\item{project}{The project to add the data to}
}
\description{
Import patients and their visits
}
\details{
Import the patient_dimension and visit_dimension death_data
As well as creating the mappings and add visit age observations

Structure for patient dataframe:
- patient_ide : character
- encounter_ide : character
- start_date : Date
- end_date : Date
- rum_start : Date
- rum_end : Date
- birth_date : Date
- death_date : Date
- sex_cd : char, 'M' or 'F'
- provider_id : char, 'STRUCT:xxx'
}

Loading…
Cancel
Save