Browse Source

Slides, script, données

master
Maxime Wack 6 years ago
parent
commit
e1364d185f
3 changed files with 1488 additions and 0 deletions
  1. +105
    -0
      01_Intro/EDA.R
  2. +1383
    -0
      01_Intro/commits.csv
  3. BIN
      01_Intro/intro.odp

+ 105
- 0
01_Intro/EDA.R View File

@@ -1,2 +1,107 @@
library(tidyverse)
library(gapminder)
library(lubridate)

# Preliminaire ----

# pipe : %>%
# Permet de chaîner les instructions
# x %>% f <=> f(x)
# x %>% f(.) <=> f(x)
# x %>% f(y = 10) <=> f(x, y = 10)

# EDA ----

## Diamonds ----

data(diamonds)

diamonds

diamonds %>%
str

diamonds %>%
summary

diamonds %>%
keep(is.numeric) %>%
map(sd)

diamonds %>%
ggplot() +
aes(x = carat) +
geom_histogram(binwidth = .05)

diamonds %>%
ggplot() +
aes(x = cut) +
geom_bar()

# …

plot_desc <- function(x, var, ...)
{
x %>%
ggplot() +
aes_string(x = var) -> g

if (x[[var]] %>% is.numeric)
g + geom_histogram(...)
else
g + geom_bar(...)
}

diamonds %>%
plot_desc("carat", binwidth = .01)

diamonds %>%
keep(is.numeric) %>%
names %>%
map(~plot_desc(diamonds, ., binwidth = .5)) %>%
set_names(diamonds %>% keep(is.numeric) %>% names) -> plots

diamonds %>%
keep(is.factor) %>%
names %>%
map(~plot_desc(diamonds, .)) %>%
set_names(diamonds %>% keep(is.factor) %>% names) -> plots

diamonds %>%
sample_n(1000) %>%
plot

diamonds %>%
ggplot() +
aes(x = carat, fill = clarity) +
geom_histogram(position = "fill")

diamonds %>%
ggplot() +
aes(x = carat, fill = cut) +
geom_histogram(position = "fill")

diamonds %>%
ggplot() +
aes(x = carat, fill = color) +
geom_histogram(position = "fill")

diamonds %>%
ggplot() +
aes(x = x, y = y) +
geom_point()

diamonds %>%
ggplot() +
aes(x = carat, y = price, color = color) +
geom_point()

## Exercice gapminder ----

data(gapminder)

# Feature engineering ----

read_csv2("commits.csv")

# Commits par jour ? En fonction de l'heure ? Par jour et par heure ? boulot/perso ? en semaine/week-end ?

+ 1383
- 0
01_Intro/commits.csv
File diff suppressed because it is too large
View File


BIN
01_Intro/intro.odp View File


Loading…
Cancel
Save