diff --git a/init_db.R b/init_db.R new file mode 100644 index 0000000..1c6d661 --- /dev/null +++ b/init_db.R @@ -0,0 +1,60 @@ +library(tidyverse) +library(RSQLite) +library(magrittr) +library(httr) +library(rvest) +library(stringr) + +annee <- 2017 + +db <- dbConnect(SQLite(), "ecn.db") + +celine <- GET(str_c("http://cngsante.fr/chiron", annee, "/celine/listing.html")) + +celine %>% + content %>% + html_node("tr:first-child") %>% + html_text -> +timestamp + +timestamp %>% + str_extract("\\d+h\\d+") %>% + str_replace("h", ":") -> heure + +timestamp %>% + str_replace(" \\d+h\\d+ ", "") %>% + str_extract("\\w+") -> date + +date %>% + str_replace("([a-z]+)(\\d+)([a-z]+)", "\\2") -> jour + +date %>% + str_replace("([a-z]+)(\\d+)([a-z]+)", "\\3") -> mois + case_when(mois == "jul" ~ "07", + mois == "sep" ~ "09", + mois %>% str_detect("^ao") ~ "08") -> mois + +timestamp <- str_c(annee, "-", mois, "-", jour, " ", heure) + +celine %>% + str_replace_all("\n", "") %>% + str_replace("(