Browse Source

Ajout feed_db.R

static
Maxime Wack 6 years ago
parent
commit
b17972e16e
1 changed files with 80 additions and 0 deletions
  1. +80
    -0
      feed_db.R

+ 80
- 0
feed_db.R View File

@@ -0,0 +1,80 @@
library(tidyverse)
library(RSQLite)
library(magrittr)
library(httr)
library(rvest)
library(stringr)

annee <- 2017

db <- dbConnect(SQLite(), "ecn.db")

# Fetch previous state ----
db %>%
tbl("trajectoires") %>%
select(Etat, Etudiant, `Vœu`, Rang, Subdivision, Discipline, `Désir (non officiel) en chirurgie générale`, timestamp) %>%
collect(n = Inf) %>%
group_by(Etudiant) %>%
filter(timestamp == max(timestamp)) ->
current

# Fetch celine and data ----
celine <- GET(str_c("http://cngsante.fr/chiron", annee, "/celine/listing.html"))

celine %>%
content %>%
html_node("tr:first-child") %>%
html_text ->
timestamp

timestamp %>%
str_extract("\\d+h\\d+") %>%
str_replace("h", ":") ->
heure

timestamp %>%
str_replace(" \\d+h\\d+ ", "") %>%
str_extract("\\w+") ->
date

date %>%
str_replace("([a-z]+)(\\d+)([a-z]+)", "\\2") ->
jour

date %>%
str_replace("([a-z]+)(\\d+)([a-z]+)", "\\3") ->
mois
case_when(mois == "jul" ~ "07",
mois == "sep" ~ "09",
mois %>% str_detect("^ao") ~ "08") ->
mois

timestamp <- str_c(annee, "-", mois, "-", jour, " ", heure)

celine %>%
str_replace_all("\n", "") %>%
str_replace("(<tr>.*?</tr>){8}", "") %>%
read_html %>%
html_table(header = T) %>%
.[[1]] ->
listing

listing %>%
select(-SubDis) %>%
filter(Etat != "déclassé") %>%
mutate(Etudiant = ifelse(Etudiant %>% str_detect("\\d+ \\( (\\d+) \\)"),
Etudiant %>% str_replace("\\d+ \\( (\\d+) \\)", "\\1"),
Etudiant)
%>% as.numeric,
Discipline = Discipline %>% str_replace("Discipline .*? : ", ""),
Subdivision = Subdivision %>% str_replace("CHU ((d')|(de ))?", ""),
timestamp = timestamp) %>%
mutate_if(is.character, factor) ->
listing

# Ajout des différences avec timestamp
listing %>%
anti_join(current, by = c("Etat", "Etudiant", "Vœu", "Rang", "Subdivision", "Discipline", "Désir (non officiel) en chirurgie générale")) %>%
dbWriteTable(conn = db, value = ., name = "trajectoires", append = T)

db %>% dbDisconnect

Loading…
Cancel
Save