Browse Source

Ajout init_db.R

static
Maxime Wack 6 years ago
parent
commit
b2c424ce1d
1 changed files with 60 additions and 0 deletions
  1. +60
    -0
      init_db.R

+ 60
- 0
init_db.R View File

@@ -0,0 +1,60 @@
library(tidyverse)
library(RSQLite)
library(magrittr)
library(httr)
library(rvest)
library(stringr)

annee <- 2017

db <- dbConnect(SQLite(), "ecn.db")

celine <- GET(str_c("http://cngsante.fr/chiron", annee, "/celine/listing.html"))

celine %>%
content %>%
html_node("tr:first-child") %>%
html_text ->
timestamp

timestamp %>%
str_extract("\\d+h\\d+") %>%
str_replace("h", ":") -> heure

timestamp %>%
str_replace(" \\d+h\\d+ ", "") %>%
str_extract("\\w+") -> date

date %>%
str_replace("([a-z]+)(\\d+)([a-z]+)", "\\2") -> jour

date %>%
str_replace("([a-z]+)(\\d+)([a-z]+)", "\\3") -> mois
case_when(mois == "jul" ~ "07",
mois == "sep" ~ "09",
mois %>% str_detect("^ao") ~ "08") -> mois

timestamp <- str_c(annee, "-", mois, "-", jour, " ", heure)

celine %>%
str_replace_all("\n", "") %>%
str_replace("(<tr>.*?</tr>){8}", "") %>%
read_html %>%
html_table(header = T) %>%
.[[1]] ->
listing

listing %>%
select(-SubDis) %>%
filter(Etat != "déclassé") %>%
mutate(Etudiant = ifelse(Etudiant %>% str_detect("\\d+ \\( (\\d+) \\)"),
Etudiant %>% str_replace("\\d+ \\( (\\d+) \\)", "\\1"),
Etudiant)
%>% as.numeric,
Discipline = Discipline %>% str_replace("Discipline .*? : ", ""),
Subdivision = Subdivision %>% str_replace("CHU ((d')|(de ))?", ""),
timestamp = timestamp) %>%
mutate_if(is.character, factor) %>%
dbWriteTable(conn = db, value = ., name = "trajectoires", overwrite = T, indexes = list("Etudiant", "Discipline", "Subdivision"))

db %>% dbDisconnect

Loading…
Cancel
Save