You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

92 lines
2.1KB

  1. library(tidyverse)
  2. library(RSQLite)
  3. library(magrittr)
  4. library(httr)
  5. library(rvest)
  6. library(stringr)
  7. annee <- 2017
  8. # Fetch celine and data ----
  9. celine <- GET(str_c("http://cngsante.fr/chiron", annee, "/celine/listing.html"))
  10. celine %>%
  11. content %>%
  12. html_node("tr:first-child") %>%
  13. html_text ->
  14. timestamp
  15. readRDS("prev_time.rds") -> prev_time
  16. if (timestamp == prev_time)
  17. {
  18. q(save = "no")
  19. } else
  20. {
  21. saveRDS(timestamp, file = "prev_time.rds")
  22. }
  23. db <- dbConnect(SQLite(), "ecn.db")
  24. # Fetch previous state ----
  25. db %>%
  26. tbl("trajectoires") %>%
  27. select(Etat, Etudiant, `Vœu`, Rang, Subdivision, Discipline, `Désir (non officiel) en chirurgie générale`, timestamp) %>%
  28. collect(n = Inf) %>%
  29. group_by(Etudiant) %>%
  30. filter(timestamp == max(timestamp)) ->
  31. current
  32. timestamp %>%
  33. str_extract("\\d+h\\d+") %>%
  34. str_replace("h", ":") ->
  35. heure
  36. timestamp %>%
  37. str_replace(" \\d+h\\d+ ", "") %>%
  38. str_extract("\\w+") ->
  39. date
  40. date %>%
  41. str_replace("([a-z]+)(\\d+)([a-z]+)", "\\2") ->
  42. jour
  43. date %>%
  44. str_replace("([a-z]+)(\\d+)([a-z]+)", "\\3") ->
  45. mois
  46. case_when(mois == "jul" ~ "07",
  47. mois == "sep" ~ "09",
  48. mois == "aoû" ~ "08") ->
  49. mois
  50. timestamp <- str_c(annee, "-", mois, "-", jour, " ", heure)
  51. celine %>%
  52. str_replace_all("\n", "") %>%
  53. str_replace("(<tr>.*?</tr>){8}", "") %>%
  54. read_html %>%
  55. html_table(header = T) %>%
  56. .[[1]] ->
  57. listing
  58. listing %>%
  59. select(-SubDis) %>%
  60. filter(Etat != "déclassé") %>%
  61. mutate(Etudiant = ifelse(Etudiant %>% str_detect("\\d+ \\( (\\d+) \\)"),
  62. Etudiant %>% str_replace("\\d+ \\( (\\d+) \\)", "\\1"),
  63. Etudiant)
  64. %>% as.numeric,
  65. Discipline = Discipline %>% str_replace("Discipline .*? : ", ""),
  66. Subdivision = Subdivision %>% str_replace("CHU ((d')|(de ))?", ""),
  67. timestamp = timestamp) %>%
  68. mutate_if(is.character, factor) ->
  69. listing
  70. # Ajout des différences avec timestamp
  71. listing %>%
  72. anti_join(current, by = c("Etat", "Etudiant", "Vœu", "Rang", "Subdivision", "Discipline", "Désir (non officiel) en chirurgie générale")) %>%
  73. dbWriteTable(conn = db, value = ., name = "trajectoires", append = T)
  74. db %>% dbDisconnect