You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

36 lines
1.1KB

  1. library(dplyr)
  2. library(readr)
  3. library(stringr)
  4. library(rvest)
  5. extractOvalide <- function(annee, mois, table, subtable = "")
  6. {
  7. # Lire le fichier + corrections
  8. str_c("OVALIDE T2A.MCO.DGF", annee, mois, "html", sep = ".") %>%
  9. read_file(locale = locale(encoding = "ISO8859-1")) %>%
  10. str_replace_all("\\n", "") %>%
  11. str_replace_all("<br>", " ") -> current
  12. # Extraction de la table
  13. if (table == "1.D.2.EDMS")
  14. {
  15. current %>%
  16. str_extract(str_c('<td class="c systemtitle">Tableau \\[', table, '\\] ', subtable, '(.*?<\\/table>){3}')) %>%
  17. str_replace_all("(\\d) (\\d)", "\\1\\2") %>%
  18. str_replace_all("\\.<\\/td>", "<\\/td>") %>%
  19. str_replace_all("&nbsp;", " ") %>%
  20. read_html %>%
  21. html_table(trim = T, dec = ",", header = F) %>%
  22. bind_rows
  23. } else
  24. {
  25. current %>%
  26. str_extract(str_c('<td class="c systemtitle">Tableau \\[', table, '\\] ', subtable, '(.*?<\\/table>){2}')) %>%
  27. str_replace_all("(\\d) (\\d)", "\\1\\2") %>%
  28. str_replace_all("\\.<\\/td>", "<\\/td>") %>%
  29. str_replace_all("&nbsp;", " ") %>%
  30. read_html %>%
  31. html_table(trim = T, dec = ",") %>%
  32. .[[1]]
  33. }
  34. }