|
|
@@ -0,0 +1,80 @@ |
|
|
|
library(tidyverse) |
|
|
|
library(RSQLite) |
|
|
|
library(magrittr) |
|
|
|
library(httr) |
|
|
|
library(rvest) |
|
|
|
library(stringr) |
|
|
|
|
|
|
|
annee <- 2017 |
|
|
|
|
|
|
|
db <- dbConnect(SQLite(), "ecn.db") |
|
|
|
|
|
|
|
# Fetch previous state ---- |
|
|
|
db %>% |
|
|
|
tbl("trajectoires") %>% |
|
|
|
select(Etat, Etudiant, `Vœu`, Rang, Subdivision, Discipline, `Désir (non officiel) en chirurgie générale`, timestamp) %>% |
|
|
|
collect(n = Inf) %>% |
|
|
|
group_by(Etudiant) %>% |
|
|
|
filter(timestamp == max(timestamp)) -> |
|
|
|
current |
|
|
|
|
|
|
|
# Fetch celine and data ---- |
|
|
|
celine <- GET(str_c("http://cngsante.fr/chiron", annee, "/celine/listing.html")) |
|
|
|
|
|
|
|
celine %>% |
|
|
|
content %>% |
|
|
|
html_node("tr:first-child") %>% |
|
|
|
html_text -> |
|
|
|
timestamp |
|
|
|
|
|
|
|
timestamp %>% |
|
|
|
str_extract("\\d+h\\d+") %>% |
|
|
|
str_replace("h", ":") -> |
|
|
|
heure |
|
|
|
|
|
|
|
timestamp %>% |
|
|
|
str_replace(" \\d+h\\d+ ", "") %>% |
|
|
|
str_extract("\\w+") -> |
|
|
|
date |
|
|
|
|
|
|
|
date %>% |
|
|
|
str_replace("([a-z]+)(\\d+)([a-z]+)", "\\2") -> |
|
|
|
jour |
|
|
|
|
|
|
|
date %>% |
|
|
|
str_replace("([a-z]+)(\\d+)([a-z]+)", "\\3") -> |
|
|
|
mois |
|
|
|
case_when(mois == "jul" ~ "07", |
|
|
|
mois == "sep" ~ "09", |
|
|
|
mois %>% str_detect("^ao") ~ "08") -> |
|
|
|
mois |
|
|
|
|
|
|
|
timestamp <- str_c(annee, "-", mois, "-", jour, " ", heure) |
|
|
|
|
|
|
|
celine %>% |
|
|
|
str_replace_all("\n", "") %>% |
|
|
|
str_replace("(<tr>.*?</tr>){8}", "") %>% |
|
|
|
read_html %>% |
|
|
|
html_table(header = T) %>% |
|
|
|
.[[1]] -> |
|
|
|
listing |
|
|
|
|
|
|
|
listing %>% |
|
|
|
select(-SubDis) %>% |
|
|
|
filter(Etat != "déclassé") %>% |
|
|
|
mutate(Etudiant = ifelse(Etudiant %>% str_detect("\\d+ \\( (\\d+) \\)"), |
|
|
|
Etudiant %>% str_replace("\\d+ \\( (\\d+) \\)", "\\1"), |
|
|
|
Etudiant) |
|
|
|
%>% as.numeric, |
|
|
|
Discipline = Discipline %>% str_replace("Discipline .*? : ", ""), |
|
|
|
Subdivision = Subdivision %>% str_replace("CHU ((d')|(de ))?", ""), |
|
|
|
timestamp = timestamp) %>% |
|
|
|
mutate_if(is.character, factor) -> |
|
|
|
listing |
|
|
|
|
|
|
|
# Ajout des différences avec timestamp |
|
|
|
listing %>% |
|
|
|
anti_join(current, by = c("Etat", "Etudiant", "Vœu", "Rang", "Subdivision", "Discipline", "Désir (non officiel) en chirurgie générale")) %>% |
|
|
|
dbWriteTable(conn = db, value = ., name = "trajectoires", append = T) |
|
|
|
|
|
|
|
db %>% dbDisconnect |