Exemples

6 years ago · b06ec771a5
--- a/03_Classification/classif.R
+++ b/03_Classification/classif.R
@@ -0,0 +1,135 @@
 # Initialisation ----
 library(tidyverse)

 # Données d'exemple
 data(iris)

 # Graphique de base
 iris %>%
  ggplot() +
  aes(x = Petal.Length, y = Sepal.Length, color = Species) +
  geom_point()

 # Set de training
 set.seed(1000)
 iris %>%
  sample_n(120) -> train

 # Set de test
 iris %>%
  anti_join(train) -> test

 # Graphique de base
 train %>%
  ggplot() +
  aes(x = Petal.Length, y = Sepal.Length, color = Species) +
  geom_point() -> g

 # kNN ----
 library(class)

 # Application du kNN sur les données de test
 knn(train = train %>% select(Petal.Length, Sepal.Length),
    test  = test %>% select(Petal.Length, Sepal.Length),
    cl    = train$Species,
    k     = 10) -> test$Species_knn

 # Graphique avec les résultats
 g +
  geom_point(data = test,
             aes(x = Petal.Length, y = Sepal.Length, color = Species_knn),
             size = 4) +
  geom_point(data = test,
             aes(x = Petal.Length, y = Sepal.Length, color = Species),
             size = 2)

 # Résultats
 mean(test$Species == test$Species_knn)

 # Nouvelles données
 tibble(Petal.Length = rnorm(10, mean(iris$Petal.Length), var(iris$Petal.Length)),
       Sepal.Length = rnorm(10, mean(iris$Sepal.Length), var(iris$Sepal.Length))) -> test2
 ## Exemples particuliers
 # tibble(Petal.Length = rep(5, 10),
 #        Sepal.Length = seq(5, 7, length.out = 10)) %>%
 # bind_rows(test2) -> test2

 # Application du kNN sur nouvelles données
 knn(train = iris %>% select(Petal.Length, Sepal.Length),
    test  = test2 %>% select(Petal.Length, Sepal.Length),
    cl    = iris$Species,
    k     = 5) -> test2$Species

 # Graphique avec les résultats
 g +
  geom_point(data = test2,
             aes(x = Petal.Length, y = Sepal.Length, color = Species),
             size = 4)

 # Arbres décisionnels ----
 library(rpart)

 rpart(Species ~ Petal.Length + Sepal.Length, data = train) -> arbre
 arbre %T>%
  plot(branch = .5, margin = .5) %>%
  text(use.n = T, all = T, pretty = T, fancy = T)

 predict(arbre, test %>% select(Petal.Length, Sepal.Length), type = "class") -> test$Species_cart

 # Graphique avec les résultats
 g +
  geom_point(data = test,
             aes(x = Petal.Length, y = Sepal.Length, color = Species_cart),
             size = 4) +
  geom_point(data = test,
             aes(x = Petal.Length, y = Sepal.Length, color = Species),
             size = 2)

 # Résultats
 mean(test$Species == test$Species_cart)

 # Régression logistique ----
 # Données
 data(mtcars)

 # Graphique de base
 mtcars %>%
  ggplot() +
  aes(x = hp, y = mpg, color = am) +
  geom_point() +
  scale_color_gradient(low = "#FF0000", high = "#0000FF")

 # Sets de training et de test
 set.seed(1000)
 mtcars %>%
  sample_n(24) -> train
 mtcars %>%
  anti_join(train) -> test

 # Graphique training
 train %>%
  ggplot() +
  aes(x = hp, y = mpg, color = am) +
  geom_point() +
  scale_color_gradient(low = "#FF0000", high = "#0000FF") -> g

 # Modèle
 glm(am ~ hp + mpg, data = train, family = "binomial") -> logi

 logi %>% summary
 logi %>% plot

 # Prédiction
 predict(logi, test %>% select(hp, mpg), type = "response") -> test$am_logi

 # Graphique avec prédiction
 g +
  geom_point(data = test,
             aes(x = hp, y = mpg, color = am_logi),
             size = 4) +
  geom_point(data = test,
             aes(x = hp, y = mpg, color = am),
             size = 2)

 # Validation
 library(pROC)