library(tidyverse) library(RSQLite) library(magrittr) library(httr) library(rvest) library(stringr) annee <- 2020 db <- dbConnect(SQLite(), "ecn.db") celine <- GET("http://www.cngsante.fr/chiron/celine/listing.html") celine %>% content %>% html_node("tr:first-child") %>% html_text -> timestamp timestamp %>% str_extract("\\d+h\\d+") %>% str_replace("h", ":") -> heure timestamp %>% str_replace(" \\d+h\\d+ ", "") %>% str_extract("\\w+") -> date date %>% str_replace("([a-z]+)(\\d+)([a-z]+)", "\\2") -> jour date %>% str_replace("([a-z]+)(\\d+)([a-z]+)", "\\3") -> mois case_when(mois == "jul" ~ "07", mois == "sep" ~ "09", mois %>% str_detect("^ao") ~ "08") -> mois timestamp <- str_c(annee, "-", mois, "-", jour, " ", heure) celine %>% str_replace_all("\n", "") %>% str_replace("(