Browse Source

Initial commit

master
Maxime Wack 6 years ago
commit
5c94df30e2
3 changed files with 492 additions and 0 deletions
  1. BIN
      cours.pptx
  2. +310
    -0
      exemples.R
  3. +182
    -0
      illustrations.R

BIN
cours.pptx View File


+ 310
- 0
exemples.R View File

@@ -0,0 +1,310 @@
library(tidyverse)

# Un temps ----

## Data
data(iris)
data(mtcars)

mtcars %>%
mutate_at(vars(am, cyl, vs, gear), factor) ->
mtcars

## Deux groupes

### Plots

boxplot(mpg ~ am, data = mtcars)

ggplot(mtcars) +
aes(x = am, y = mpg) +
geom_boxplot() +
geom_point(alpha = .5)

### Paramétrique
t.test(mpg ~ am, data = mtcars, var.equal = T)
oneway.test(mpg ~ am, data = mtcars, var.equal = T)

### Non-paramétrique
wilcox.test(mpg ~ am, data = mtcars)

## Plusieurs groupes

### Plots
iris %>%
ggplot() +
aes(x = Species, y = Sepal.Width) +
geom_violin() +
geom_point(position = "jitter", alpha = .5)

iris %>%
ggplot() +
aes(x = Petal.Length) +
geom_histogram(data = iris %>% select(-Species)) +
geom_histogram(aes(fill = Species)) +
facet_grid(~Species)

### Paramétrique
oneway.test(Sepal.Width ~ Species, data = iris)

### Non-paramétrique
kruskal.test(Sepal.Width ~ Species, data = iris)

## Multivarié

### Régression linéaire

# Y = a₁X₁ + a₂X₂ + _ + anXn + ε
# Y = Σ aiXi + ε, ε -> N
plot(mtcars)

fit <- lm(mpg ~ am, data = mtcars)
summary(fit)
plot(fit)

fit <- lm(mpg ~ cyl + disp + hp + drat + wt + vs + am, data = mtcars)
summary(fit)
plot(fit)

### Régression logistique
fit <- glm(am ~ mpg + drat + wt, data = mtcars, family = "binomial")
summary(fit)
plot(fit)

# Deux temps ----

## Data
data(ChickWeight)

### Test apparié

#### Plots
ChickWeight %>%
filter(Time %in% c(0, 2)) %>%
ggplot() +
aes(x = Time, y = weight, group = Chick, color = Diet) +
geom_point() +
geom_line()

#### Test
ChickWeight %>%
group_by(Chick) %>%
summarise(maxTime = max(Time)) %>%
summarise(minTime = min(maxTime))

t.test(ChickWeight$weight[ChickWeight$Time == 0], ChickWeight$weight[ChickWeight$Time == 2], paired = T)

### Test apparié à la main
ChickWeight %>%
filter(Time %in% c(0, 2)) %>%
group_by(Chick) %>%
summarise(weightDiff = last(weight) - first(weight),
Diet = first(Diet)) -> deuxTemps

#### Plots
deuxTemps %>%
ggplot() +
aes(x = weightDiff, fill = Diet) +
geom_density(alpha = .25) #, position = "fill") # position = "stack" "fill"

oneway.test(weightDiff ~ Diet, data = deuxTemps)
kruskal.test(weightDiff ~ Diet, data = deuxTemps)

# Survie ----
library(survival)

## Data
data(lung)

## Bivarié
lung$Surv <- Surv(lung$time, lung$status)
survfit(Surv ~ 1, data = lung) -> fit

plot(fit)

survfit(Surv ~ sex, data = lung) -> fit
plot(fit, conf.int = T, col = c("blue", "red"))
legend("topright", legend = c("Hommes", "Femmes"), col = c("blue", "red"), lty = 1)

survdiff(Surv ~ sex, data = lung)

## Cox
coxph(Surv ~ sex + age + ph.ecog + ph.karno + meal.cal + wt.loss, data = lung) -> fit
cox.zph(fit) %>% plot

# N temps ----
library(lme4)
library(lmerTest)

## Data
data(ChickWeight)

## Plots
ChickWeight %>%
ggplot() +
aes(x = Time, y = weight, group = Chick, color = Diet) %>%
geom_line()#alpha = .5) +
geom_smooth(aes(x = Time, y = weight, color = Diet))

## Test
lmer(weight ~ Diet + (1|Time) + (1|Chick), data = ChickWeight) -> fit

summary(fit)
plot(fit)

# Trajectoires ----
library(TraMineR)
library(cluster)

## Data
data(mvad)

## Description
mvad.seq <- seqdef(mvad, 17:86, xtstep = 12)
plot(mvad.seq, 1:20)
seqdplot(mvad.seq)

## Clustering
mvad.om <- seqdist(mvad.seq, method = "OM", sm = "TRATE")

clusterward <- agnes(mvad.om, diss = T, method = "ward")
mvad.cl <- cutree(clusterward, k = 4)
cl.lab <- factor(mvad.cl, labels = paste("Cluster", 1:4))
seqdplot(mvad.seq, group = cl.lab)
seqfplot(mvad.seq, group = cl.lab)
seqmsplot(mvad.seq, group = cl.lab)

## Analyse
mvad.ent <- seqient(mvad.seq)
lm.ent <- lm(mvad.ent ~ male + funemp + gcse5eq, data = mvad)
summary(lm.ent)

# Séries temporelles ----
library(astsa)
library(Rssa)
library(zoo)
library(TSA)

## Data
data(cmort)

## Description des données
is.ts(cmort) #on vérifie qu'il s'agit bien d'un objet série temporelle
start(cmort) #Première observation
end(cmort) #Dernière observation
frequency(cmort) #Fréquance des observations
deltat(cmort) #intervalle de temps entre deux observations

length(cmort)
head(cmort) #visualisation des premier éléments de l'objet cmort

tsplot(cmort) #visualisation graphique de la série


## Méthode de décomposition
MannKendall(cmort) #test de tendance : significatif donc il existe bien une tendance

### Modèle additif
D_additif1 = decompose(cmort, "additive")
plot(D_additif1)

### Modèle additif(autre méthode)
D_additif2 = stl(cmort, "periodic")
plot(D_additif2)

### Modèle multiplicatif
D_multiplicatif = decompose(cmort, "multiplicative")
plot(D_multiplicatif)

## Plots

#Jaune : signal recomposé
#Noir pointillé signal initial
graph = function(var){
plot(var, lwd = 2, col = "#FFC000", main = substitute(var))
lines(cmort,lty = 2)
}

#Reconstitution signal avec décomposition additive et bruit
cmort_ad1 = D_additif1$trend + D_additif1$seasonal + D_additif1$random
graph(cmort_ad1)
#Reconstitution signal avec décomposition additive sans bruit
cmort_ad1_ssrandom = D_additif1$trend + D_additif1$seasonal
graph(cmort_ad1_ssrandom)


#Reconstitution signal avec décomposition additive et bruit
cmort_ad2 = D_additif2$time.series[,1] + D_additif2$time.series[,2] + D_additif2$time.series[,3]
graph(cmort_ad2)
#Reconstitution signal avec décomposition additive sans bruit
cmort_ad2_ssrandom = D_additif2$time.series[,1] + D_additif2$time.series[,2]
graph(cmort_ad2_ssrandom)

#Reconstitution signal avec décomposition multiplicative et bruit
cmort_mult = D_multiplicatif$trend*D_multiplicatif$seasonal*D_multiplicatif$random
graph(cmort_mult)
#Reconstitution signal avec décomposition multiplicative sans bruit
cmort_mult_ssrandom = D_multiplicatif$trend*D_multiplicatif$seasonal
graph(cmort_mult_ssrandom)


### Autocorrélation et Modélisation

#autocorrélation
acf(cmort)

#autocorrélation partielle
pacf(cmort)


### Analyse spectrale

#périodogramme
p = periodogram(cmort)
#Trouver la période :
# Définir la fréquence où le spectre est maximum
# Période : 1/Freq
frequence = p$freq[which.max(p$spec)]
Periode = 1/frequence

#Lissage par moyenne mobile
plot(cmort, ylab = "cmort", main = "Lissage par moyenne glissante")
lines(filter(cmort, rep(1/20, 20), side = 1), col = "#FFC000", lwd = 2)
lines(filter(cmort, rep(1/12, 12), side = 1), col = "red", lwd = 2)
lines(filter(cmort, rep(1/50, 50), side = 1), col = "indianred4", lwd = 2)
legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("1/12", "1/20", "1/100"), lwd = c(2,2,2))

#Lissage par noyaux
#Calcul de 3 noyaux
k1 <- kernel("daniell", 10)
k2 <- kernel("daniell", 5)
k3 <- kernel("daniell", 30)
#Application des 3 noyaux aux données
x1 <- kernapply(cmort, k1)
x2 <- kernapply(cmort, k2)
x3 <- kernapply(cmort, k3)
#Graphiques
plot(cmort, main = "Lissage par noyaux")
lines(x1, col = "#FFC000", lwd = 2)
lines(x2, col = "red", lwd = 2)
lines(x3, col = "indianred4", lwd = 2)
legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("5", "10", "30"), lwd = c(2,2,2))


### Série temporelle multiple

#Graphique
serie = cbind(lap[,10],cmort)
colnames(serie)[1] = "ozone"
plot(serie)
abline(v = 1970:1980, col ="#FFC000", lty = 2, lwd = 2)

#Test de causalité de Granger
grangertest(cmort ~ lap[,4]) #Température
grangertest(cmort ~ lap[,5]) #Humidité relative
grangertest(cmort ~ lap[,6]) #Monoxyde de carbone
grangertest(cmort ~ lap[,7]) #Dioxyde de soufre
grangertest(cmort ~ lap[,8]) #Dioxyde d'azote
grangertest(cmort ~ lap[,9]) #Hydrocarbure
grangertest(cmort ~ lap[,10]) #Ozone
grangertest(cmort ~ lap[,11]) #Particule

+ 182
- 0
illustrations.R View File

@@ -0,0 +1,182 @@
library(tidyverse)
library(patchwork)


# Graphique test statistique ----

seq(100, 0) %>%
map(function(x){
ci <- function(m, s, n, z = 1.34)
{
dev <- z * (s / sqrt(n))
c(m - dev, m + dev)
}

ztest <- function(m1, s1, n1, m2, s2, n2)
{
zscore <- abs(m2 - m1) / sqrt((s1^2 / n1) + (s2^2 / n2))
2 * (1 - pnorm(zscore))
}

tibble(m1 = x/10,
m2 = - x/10,
s = 10,
n = 20) -> df

df %>%
ggplot() +
aes(x = m1) +
stat_function(fun = dnorm, args = list(mean = df$m1, sd = df$s), color = "blue", size = 1.5, n = 1000) +
stat_function(fun = dnorm, args = list(mean = df$m2, sd = df$s), color = "red", size = 1.5, n = 1000) +
geom_vline(xintercept = ci(df$m1, df$s, df$n), color = "blue") +
geom_vline(xintercept = ci(df$m2, df$s, df$n), color = "red") +
geom_text(x = -30, hjust = 0, y = .05, vjust = 1, label = str_c("p = ", ztest(df$m1, df$s, df$n, df$m2, df$s, df$n)), size = 4) +
scale_x_continuous(limits = c(-30 , 30)) +
scale_y_continuous(limits = c(0, .05)) +
theme_classic() +
xlab("x") -> p1

df %>%
ggplot() +
aes(x = m1) +
stat_function(fun = dnorm, args = list(mean = abs(df$m2 - df$m1), sd = sqrt(df$s^2 / df$n + df$s^2 / df$n))) +
geom_vline(xintercept = 0) +
geom_vline(xintercept = ci(abs(df$m2 - df$m1), sqrt(df$s^2 / df$n + df$s^2 / df$n), 1, 1.96)) +
scale_x_continuous(limits = c(-10, 40)) +
theme_classic() +
xlab("x") -> p2

p1 + p2 + plot_layout(ncol = 1, heights = c(8, 2))
}) -> plots

1:101 %>%
map(~ ggsave(plot = plots[[.]],
filename = str_c("anim/", str_pad(., width = 3, pad = "0"), ".png"),
unit = "cm",
dpi = 200,
width = 16,
height = 9))
# Un temps ----

one <- tibble(color = rep(LETTERS[1:3], each = 10),
y = map((-1:1), ~rnorm(10, ., .1)) %>% unlist,
x = rnorm(30, 0, .1))

one %>%
ggplot() +
geom_segment(x = -1, y = 0, xend = 1, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) +
geom_segment(jx = 0, y = -.1, xend = 0, yend = .1, color = "darkgrey", size = 2) +
aes(x = x, y = y, color = color) +
geom_point(size = 2) +
scale_x_continuous(limits = c(-1, 1)) +
scale_y_continuous(limits = c(-2, 2)) +
theme_void() +
theme(legend.position = "none",
plot.background = element_rect(fill = "transparent", colour = "transparent")) ->
oneplot

ggsave(plot = oneplot, filename = "one.png", bg = "transparent")

# Deux temps ----

two <- tibble(x = map(c(-.5, .5), ~rnorm(10, ., .1)) %>% unlist,
y = map(c(-1, 1), ~rnorm(10, ., .1)) %>% unlist,
colour = rep("A", 20))

two %>%
ggplot() +
geom_segment(x = -1, y = 0, xend = 1, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) +
geom_segment(x = -.5, y = -.1, xend = -.5, yend = .1, color = "darkgrey", size = 2) +
geom_segment(x = .5, y = -.1, xend = .5, yend = .1, color = "darkgrey", size = 2) +
aes(x = x, y = y, colour = colour) +
geom_point(size = 2) +
scale_x_continuous(limits = c(-1, 1)) +
scale_y_continuous(limits = c(-2, 2)) +
theme_void() +
theme(legend.position = "none",
plot.background = element_rect(fill = "transparent", colour = "transparent")) ->
twoplot

ggsave(plot = twoplot, filename = "two.png", bg = "transparent")

# N temps ----

many <- tibble(x = map(-3:3, ~rnorm(10, ., .1)) %>% unlist,
y = runif(7, -1.5, 1.5) %>% map(~rnorm(10, ., .1)) %>% unlist,
colour = rep("A", 70))

many %>%
ggplot() +
geom_segment(x = -4, y = 0, xend = 4, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) +
geom_segment(x = -3, y = -.1, xend = -3, yend = .1, color = "darkgrey", size = 2) +
geom_segment(x = -2, y = -.1, xend = -2, yend = .1, color = "darkgrey", size = 2) +
geom_segment(x = -1, y = -.1, xend = -1, yend = .1, color = "darkgrey", size = 2) +
geom_segment(x = 0, y = -.1, xend = 0, yend = .1, color = "darkgrey", size = 2) +
geom_segment(x = 1, y = -.1, xend = 1, yend = .1, color = "darkgrey", size = 2) +
geom_segment(x = 2, y = -.1, xend = 2, yend = .1, color = "darkgrey", size = 2) +
geom_segment(x = 3, y = -.1, xend = 3, yend = .1, color = "darkgrey", size = 2) +
aes(x = x, y = y, colour = colour) +
geom_point(size = 2) +
scale_x_continuous(limits = c(-4, 4)) +
scale_y_continuous(limits = c(-2, 2)) +
theme_void() +
theme(legend.position = "none",
plot.background = element_rect(fill = "transparent", colour = "transparent")) ->
manyplot

ggsave(plot = manyplot, filename = "many.png", bg = "transparent")

# Série temporelle ----

## Figure introduction
library(astsa)
data(flu) #chargement de la base "flu" du package astsa

## Représentation graphique de la série temporelle
tsplot(flu,xlab = "Time", ylab = "Deaths per 10,000 people",
main="Monthly pneumonia and influenza deaths per 10,000 people \n in the United States for 11 years, 1968 to 1978")

## Simulation série

### Simulation des paramêtres temps, tendance, saisonalité, bruit
t = time(cmort)
m = 0.2*t+2.1
s = cos(5*t)
epsilon = rnorm(length(t),0,0.5)

### Simulation des séries temporelles comme modèle additif/multiplicatif/Hybride
simu.ts.additif = ts(m+s+epsilon,frequency = 52, start = c(1970, 1) )
tsplot(simu.ts.additif)

simu.ts.multiplictif = ts(m*s*epsilon,frequency = 52, start = c(1970, 1) )
tsplot(simu.ts.multiplictif)

simu.ts.hybride = ts(m*s+epsilon*100,frequency = 52, start = c(1970, 1) )
tsplot(simu.ts.hybride)

#### Courbes
plot(t,m, type = "l", main = "Tendance", xlab = "Temps", ylab = "")
plot(t,s, type = "l", main = "Saisonnalité", xlab = "Temps", ylab = "")
plot(t,epsilon, type = "l", main = "Bruit", xlab = "Temps", ylab = "")

### Décomposition des signaux simulés
plot(decompose(simu.ts.additif, type="additive")) #décomposition trend/seasonnality/noise, modèle additif
plot(stl(simu.ts.additif, "periodic"))
plot(decompose(decompose(simu.ts.additif, type="additive")$random, type="additive"))

## Simulation modèles AR et MA

### Simulation AR(1) : X_t = 0.6X_{t???1} + ??_t
X=arima.sim(n = 2400, list(ar = 0.6),sd = 1)
plot(acf(X),lwd=2, main = "Acf AR(1)")
plot(pacf(X),lwd=2, main = "Pacf AR(1)")

### Simulation AR(2) : Xt = 0.6X_{t???1} - 0.4X_{t???2} + ??t
X=arima.sim(n = 2400, list(ar = c(0.6,-0.4)),sd = 1)
plot(acf(X),lwd=2, main = "Acf AR(2)")
plot(pacf(X),lwd=2, main = "Pacf AR(2)")

### Simulation MA(1) X_t = ??_t + 0.7??_{t???1}
X=arima.sim(n = 2400, list(ma = .7),sd = 1)
plot(acf(X),lwd=2, main = "Acf MA(1)")
plot(pacf(X),lwd=2, main = "Pacf MA(1)")

Loading…
Cancel
Save