You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

139 lines
3.3KB

  1. # Initialisation ----
  2. library(tidyverse)
  3. # Données d'exemple
  4. data(iris)
  5. # Graphique de base
  6. iris %>%
  7. ggplot() +
  8. aes(x = Petal.Length, y = Sepal.Length, color = Species) +
  9. geom_point()
  10. # Set de training
  11. set.seed(1000)
  12. iris %>%
  13. sample_n(120) -> train
  14. # Set de test
  15. iris %>%
  16. anti_join(train) -> test
  17. # Graphique de base
  18. train %>%
  19. ggplot() +
  20. aes(x = Petal.Length, y = Sepal.Length, color = Species) +
  21. geom_point() -> g
  22. # kNN ----
  23. library(class)
  24. # Application du kNN sur les données de test
  25. knn(train = train %>% select(Petal.Length, Sepal.Length),
  26. test = test %>% select(Petal.Length, Sepal.Length),
  27. cl = train$Species,
  28. k = 10) -> test$Species_knn
  29. # Graphique avec les résultats
  30. g +
  31. geom_point(data = test,
  32. aes(x = Petal.Length, y = Sepal.Length, color = Species_knn),
  33. size = 4) +
  34. geom_point(data = test,
  35. aes(x = Petal.Length, y = Sepal.Length, color = Species),
  36. size = 2)
  37. # Résultats
  38. mean(test$Species == test$Species_knn)
  39. # Nouvelles données
  40. tibble(Petal.Length = rnorm(10, mean(iris$Petal.Length), var(iris$Petal.Length)),
  41. Sepal.Length = rnorm(10, mean(iris$Sepal.Length), var(iris$Sepal.Length))) -> test2
  42. ## Exemples particuliers
  43. # tibble(Petal.Length = rep(5, 10),
  44. # Sepal.Length = seq(5, 7, length.out = 10)) %>%
  45. # bind_rows(test2) -> test2
  46. # Application du kNN sur nouvelles données
  47. knn(train = iris %>% select(Petal.Length, Sepal.Length),
  48. test = test2 %>% select(Petal.Length, Sepal.Length),
  49. cl = iris$Species,
  50. k = 5) -> test2$Species
  51. # Graphique avec les résultats
  52. g +
  53. geom_point(data = test2,
  54. aes(x = Petal.Length, y = Sepal.Length, color = Species),
  55. size = 4)
  56. # Arbres décisionnels ----
  57. library(rpart)
  58. rpart(Species ~ Petal.Length + Sepal.Length, data = train) -> arbre
  59. arbre %T>%
  60. plot(branch = .5, margin = .5) %>%
  61. text(use.n = T, all = T, pretty = T, fancy = T)
  62. predict(arbre, test %>% select(Petal.Length, Sepal.Length), type = "class") -> test$Species_cart
  63. # Graphique avec les résultats
  64. g +
  65. geom_point(data = test,
  66. aes(x = Petal.Length, y = Sepal.Length, color = Species_cart),
  67. size = 4) +
  68. geom_point(data = test,
  69. aes(x = Petal.Length, y = Sepal.Length, color = Species),
  70. size = 2)
  71. # Résultats
  72. mean(test$Species == test$Species_cart)
  73. # Régression logistique ----
  74. # Données
  75. data(mtcars)
  76. # Graphique de base
  77. mtcars %>%
  78. ggplot() +
  79. aes(x = hp, y = mpg, color = am) +
  80. geom_point() +
  81. scale_color_gradient(low = "#FF0000", high = "#0000FF")
  82. # Sets de training et de test
  83. set.seed(1000)
  84. mtcars %>%
  85. sample_n(24) -> train
  86. mtcars %>%
  87. anti_join(train) -> test
  88. # Graphique training
  89. train %>%
  90. ggplot() +
  91. aes(x = hp, y = mpg, color = am) +
  92. geom_point() +
  93. scale_color_gradient(low = "#FF0000", high = "#0000FF") -> g
  94. # Modèle
  95. glm(am ~ hp + mpg, data = train, family = "binomial") -> logi
  96. logi %>% summary
  97. logi %>% plot
  98. # Prédiction
  99. predict(logi, test %>% select(hp, mpg), type = "response") -> test$am_logi
  100. # Graphique avec prédiction
  101. g +
  102. geom_point(data = test,
  103. aes(x = hp, y = mpg, color = am_logi),
  104. size = 4) +
  105. geom_point(data = test,
  106. aes(x = hp, y = mpg, color = am),
  107. size = 2)
  108. # Validation
  109. library(pROC)
  110. roc(am ~ am_logi, data = test) -> ROC
  111. plot(ROC)