|
|
@@ -1,2 +1,107 @@ |
|
|
|
library(tidyverse) |
|
|
|
library(gapminder) |
|
|
|
library(lubridate) |
|
|
|
|
|
|
|
# Preliminaire ---- |
|
|
|
|
|
|
|
# pipe : %>% |
|
|
|
# Permet de chaîner les instructions |
|
|
|
# x %>% f <=> f(x) |
|
|
|
# x %>% f(.) <=> f(x) |
|
|
|
# x %>% f(y = 10) <=> f(x, y = 10) |
|
|
|
|
|
|
|
# EDA ---- |
|
|
|
|
|
|
|
## Diamonds ---- |
|
|
|
|
|
|
|
data(diamonds) |
|
|
|
|
|
|
|
diamonds |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
str |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
summary |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
keep(is.numeric) %>% |
|
|
|
map(sd) |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = carat) + |
|
|
|
geom_histogram(binwidth = .05) |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = cut) + |
|
|
|
geom_bar() |
|
|
|
|
|
|
|
# … |
|
|
|
|
|
|
|
plot_desc <- function(x, var, ...) |
|
|
|
{ |
|
|
|
x %>% |
|
|
|
ggplot() + |
|
|
|
aes_string(x = var) -> g |
|
|
|
|
|
|
|
if (x[[var]] %>% is.numeric) |
|
|
|
g + geom_histogram(...) |
|
|
|
else |
|
|
|
g + geom_bar(...) |
|
|
|
} |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
plot_desc("carat", binwidth = .01) |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
keep(is.numeric) %>% |
|
|
|
names %>% |
|
|
|
map(~plot_desc(diamonds, ., binwidth = .5)) %>% |
|
|
|
set_names(diamonds %>% keep(is.numeric) %>% names) -> plots |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
keep(is.factor) %>% |
|
|
|
names %>% |
|
|
|
map(~plot_desc(diamonds, .)) %>% |
|
|
|
set_names(diamonds %>% keep(is.factor) %>% names) -> plots |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
sample_n(1000) %>% |
|
|
|
plot |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = carat, fill = clarity) + |
|
|
|
geom_histogram(position = "fill") |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = carat, fill = cut) + |
|
|
|
geom_histogram(position = "fill") |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = carat, fill = color) + |
|
|
|
geom_histogram(position = "fill") |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = x, y = y) + |
|
|
|
geom_point() |
|
|
|
|
|
|
|
diamonds %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = carat, y = price, color = color) + |
|
|
|
geom_point() |
|
|
|
|
|
|
|
## Exercice gapminder ---- |
|
|
|
|
|
|
|
data(gapminder) |
|
|
|
|
|
|
|
# Feature engineering ---- |
|
|
|
|
|
|
|
read_csv2("commits.csv") |
|
|
|
|
|
|
|
# Commits par jour ? En fonction de l'heure ? Par jour et par heure ? boulot/perso ? en semaine/week-end ? |