Browse Source

Exemples

master
Maxime Wack 6 years ago
parent
commit
b06ec771a5
1 changed files with 135 additions and 0 deletions
  1. +135
    -0
      03_Classification/classif.R

+ 135
- 0
03_Classification/classif.R View File

@@ -0,0 +1,135 @@
# Initialisation ----
library(tidyverse)

# Données d'exemple
data(iris)

# Graphique de base
iris %>%
ggplot() +
aes(x = Petal.Length, y = Sepal.Length, color = Species) +
geom_point()

# Set de training
set.seed(1000)
iris %>%
sample_n(120) -> train

# Set de test
iris %>%
anti_join(train) -> test

# Graphique de base
train %>%
ggplot() +
aes(x = Petal.Length, y = Sepal.Length, color = Species) +
geom_point() -> g

# kNN ----
library(class)

# Application du kNN sur les données de test
knn(train = train %>% select(Petal.Length, Sepal.Length),
test = test %>% select(Petal.Length, Sepal.Length),
cl = train$Species,
k = 10) -> test$Species_knn

# Graphique avec les résultats
g +
geom_point(data = test,
aes(x = Petal.Length, y = Sepal.Length, color = Species_knn),
size = 4) +
geom_point(data = test,
aes(x = Petal.Length, y = Sepal.Length, color = Species),
size = 2)

# Résultats
mean(test$Species == test$Species_knn)

# Nouvelles données
tibble(Petal.Length = rnorm(10, mean(iris$Petal.Length), var(iris$Petal.Length)),
Sepal.Length = rnorm(10, mean(iris$Sepal.Length), var(iris$Sepal.Length))) -> test2
## Exemples particuliers
# tibble(Petal.Length = rep(5, 10),
# Sepal.Length = seq(5, 7, length.out = 10)) %>%
# bind_rows(test2) -> test2

# Application du kNN sur nouvelles données
knn(train = iris %>% select(Petal.Length, Sepal.Length),
test = test2 %>% select(Petal.Length, Sepal.Length),
cl = iris$Species,
k = 5) -> test2$Species

# Graphique avec les résultats
g +
geom_point(data = test2,
aes(x = Petal.Length, y = Sepal.Length, color = Species),
size = 4)

# Arbres décisionnels ----
library(rpart)

rpart(Species ~ Petal.Length + Sepal.Length, data = train) -> arbre
arbre %T>%
plot(branch = .5, margin = .5) %>%
text(use.n = T, all = T, pretty = T, fancy = T)

predict(arbre, test %>% select(Petal.Length, Sepal.Length), type = "class") -> test$Species_cart

# Graphique avec les résultats
g +
geom_point(data = test,
aes(x = Petal.Length, y = Sepal.Length, color = Species_cart),
size = 4) +
geom_point(data = test,
aes(x = Petal.Length, y = Sepal.Length, color = Species),
size = 2)

# Résultats
mean(test$Species == test$Species_cart)

# Régression logistique ----
# Données
data(mtcars)

# Graphique de base
mtcars %>%
ggplot() +
aes(x = hp, y = mpg, color = am) +
geom_point() +
scale_color_gradient(low = "#FF0000", high = "#0000FF")

# Sets de training et de test
set.seed(1000)
mtcars %>%
sample_n(24) -> train
mtcars %>%
anti_join(train) -> test

# Graphique training
train %>%
ggplot() +
aes(x = hp, y = mpg, color = am) +
geom_point() +
scale_color_gradient(low = "#FF0000", high = "#0000FF") -> g

# Modèle
glm(am ~ hp + mpg, data = train, family = "binomial") -> logi

logi %>% summary
logi %>% plot

# Prédiction
predict(logi, test %>% select(hp, mpg), type = "response") -> test$am_logi

# Graphique avec prédiction
g +
geom_point(data = test,
aes(x = hp, y = mpg, color = am_logi),
size = 4) +
geom_point(data = test,
aes(x = hp, y = mpg, color = am),
size = 2)

# Validation
library(pROC)

Loading…
Cancel
Save