library(tidyverse) library(RSQLite) library(magrittr) library(httr) library(rvest) library(stringr) setwd("/srv/shiny/celine") annee <- 2020 # Fetch celine and data ---- celine <- GET("http://www.cngsante.fr/chiron/celine/listing.html") celine %>% content %>% html_node("tr:first-child") %>% html_text -> timestamp readRDS("prev_time.rds") -> prev_time if (timestamp == prev_time) { q(save = "no") } else { saveRDS(timestamp, file = "prev_time.rds") } db <- dbConnect(SQLite(), "ecn.db") # Fetch previous state ---- db %>% tbl("trajectoires") %>% select(Etat, Etudiant, `Vœu`, Rang, Subdivision, Discipline, `Désir (non officiel) en chirurgie générale`, timestamp) %>% collect(n = Inf) %>% group_by(Etudiant) %>% mutate_at(vars(`Désir (non officiel) en chirurgie générale`), as.character) %>% filter(timestamp == max(timestamp)) -> current timestamp %>% str_extract("\\d+h\\d+") %>% str_replace("h", ":") -> heure timestamp %>% str_replace(" \\d+h\\d+ ", "") %>% str_extract("\\w+") -> date date %>% str_replace("([a-z]+)(\\d+)([a-zû]+)", "\\2") -> jour date %>% str_replace("([a-z]+)(\\d+)([a-z]+)", "\\3") -> mois case_when(mois == "jul" ~ "07", mois == "sep" ~ "09", mois == "aoû" ~ "08") -> mois timestamp <- str_c(annee, "-", mois, "-", jour, " ", heure) celine %>% str_replace_all("\n", "") %>% str_replace("(.*?){8}", "") %>% read_html %>% html_table(header = T) %>% .[[1]] -> listing listing %>% select(-SubDis) %>% filter(Etat != "déclassé") %>% mutate(Etudiant = ifelse(Etudiant %>% str_detect("\\d+ \\( (\\d+) \\)"), Etudiant %>% str_replace("\\d+ \\( (\\d+) \\)", "\\1"), Etudiant) %>% as.numeric, Discipline = Discipline %>% str_replace("Discipline .*? : ", ""), Subdivision = Subdivision %>% str_replace("CHU ((d')|(de ))?", ""), timestamp = timestamp) %>% mutate_if(is.character, factor) -> listing # Ajout des différences avec timestamp listing %>% mutate(`Désir (non officiel) en chirurgie générale` = `Désir (non officiel) en chirurgie générale` %>% as.character) %>% anti_join(current, by = c("Etat", "Etudiant", "Vœu", "Rang", "Subdivision", "Discipline", "Désir (non officiel) en chirurgie générale")) %>% dbWriteTable(conn = db, value = ., name = "trajectoires", append = T) db %>% dbDisconnect