|
- library(dplyr)
- library(readr)
- library(stringr)
- library(rvest)
-
- extractOvalide <- function(annee, mois, table, subtable = "")
- {
- # Lire le fichier + corrections
- str_c("OVALIDE T2A.MCO.DGF", annee, mois, "html", sep = ".") %>%
- read_file(locale = locale(encoding = "ISO8859-1")) %>%
- str_replace_all("\\n", "") %>%
- str_replace_all("<br>", " ") -> current
-
- # Extraction de la table
- if (table == "1.D.2.EDMS")
- {
- current %>%
- str_extract(str_c('<td class="c systemtitle">Tableau \\[', table, '\\] ', subtable, '(.*?<\\/table>){3}')) %>%
- str_replace_all("(\\d) (\\d)", "\\1\\2") %>%
- str_replace_all("\\.<\\/td>", "<\\/td>") %>%
- str_replace_all(" ", " ") %>%
- read_html %>%
- html_table(trim = T, dec = ",", header = F) %>%
- bind_rows
- } else
- {
- current %>%
- str_extract(str_c('<td class="c systemtitle">Tableau \\[', table, '\\] ', subtable, '(.*?<\\/table>){2}')) %>%
- str_replace_all("(\\d) (\\d)", "\\1\\2") %>%
- str_replace_all("\\.<\\/td>", "<\\/td>") %>%
- str_replace_all(" ", " ") %>%
- read_html %>%
- html_table(trim = T, dec = ",") %>%
- .[[1]]
- }
- }
|