|
- # Initialisation ----
- library(tidyverse)
-
- # Données d'exemple
- data(iris)
-
- # Graphique de base
- iris %>%
- ggplot() +
- aes(x = Petal.Length, y = Sepal.Length, color = Species) +
- geom_point()
-
- # Set de training
- set.seed(1000)
- iris %>%
- sample_n(120) -> train
-
- # Set de test
- iris %>%
- anti_join(train) -> test
-
- # Graphique de base
- train %>%
- ggplot() +
- aes(x = Petal.Length, y = Sepal.Length, color = Species) +
- geom_point() -> g
-
- # kNN ----
- library(class)
-
- # Application du kNN sur les données de test
- knn(train = train %>% select(Petal.Length, Sepal.Length),
- test = test %>% select(Petal.Length, Sepal.Length),
- cl = train$Species,
- k = 10) -> test$Species_knn
-
- # Graphique avec les résultats
- g +
- geom_point(data = test,
- aes(x = Petal.Length, y = Sepal.Length, color = Species_knn),
- size = 4) +
- geom_point(data = test,
- aes(x = Petal.Length, y = Sepal.Length, color = Species),
- size = 2)
-
- # Résultats
- mean(test$Species == test$Species_knn)
-
- # Nouvelles données
- tibble(Petal.Length = rnorm(10, mean(iris$Petal.Length), var(iris$Petal.Length)),
- Sepal.Length = rnorm(10, mean(iris$Sepal.Length), var(iris$Sepal.Length))) -> test2
- ## Exemples particuliers
- # tibble(Petal.Length = rep(5, 10),
- # Sepal.Length = seq(5, 7, length.out = 10)) %>%
- # bind_rows(test2) -> test2
-
- # Application du kNN sur nouvelles données
- knn(train = iris %>% select(Petal.Length, Sepal.Length),
- test = test2 %>% select(Petal.Length, Sepal.Length),
- cl = iris$Species,
- k = 5) -> test2$Species
-
- # Graphique avec les résultats
- g +
- geom_point(data = test2,
- aes(x = Petal.Length, y = Sepal.Length, color = Species),
- size = 4)
-
- # Arbres décisionnels ----
- library(rpart)
-
- rpart(Species ~ Petal.Length + Sepal.Length, data = train) -> arbre
- arbre %T>%
- plot(branch = .5, margin = .5) %>%
- text(use.n = T, all = T, pretty = T, fancy = T)
-
- predict(arbre, test %>% select(Petal.Length, Sepal.Length), type = "class") -> test$Species_cart
-
- # Graphique avec les résultats
- g +
- geom_point(data = test,
- aes(x = Petal.Length, y = Sepal.Length, color = Species_cart),
- size = 4) +
- geom_point(data = test,
- aes(x = Petal.Length, y = Sepal.Length, color = Species),
- size = 2)
-
- # Résultats
- mean(test$Species == test$Species_cart)
-
- # Régression logistique ----
- # Données
- data(mtcars)
-
- # Graphique de base
- mtcars %>%
- ggplot() +
- aes(x = hp, y = mpg, color = am) +
- geom_point() +
- scale_color_gradient(low = "#FF0000", high = "#0000FF")
-
- # Sets de training et de test
- set.seed(1000)
- mtcars %>%
- sample_n(24) -> train
- mtcars %>%
- anti_join(train) -> test
-
- # Graphique training
- train %>%
- ggplot() +
- aes(x = hp, y = mpg, color = am) +
- geom_point() +
- scale_color_gradient(low = "#FF0000", high = "#0000FF") -> g
-
- # Modèle
- glm(am ~ hp + mpg, data = train, family = "binomial") -> logi
-
- logi %>% summary
- logi %>% plot
-
- # Prédiction
- predict(logi, test %>% select(hp, mpg), type = "response") -> test$am_logi
-
- # Graphique avec prédiction
- g +
- geom_point(data = test,
- aes(x = hp, y = mpg, color = am_logi),
- size = 4) +
- geom_point(data = test,
- aes(x = hp, y = mpg, color = am),
- size = 2)
-
- # Validation
- library(pROC)
-
- roc(am ~ am_logi, data = test) -> ROC
- plot(ROC)
|