commit 5c94df30e2a2e8b14384093a0cdf0fcc8e892652 Author: Maxime Wack Date: Mon Jan 8 23:31:36 2018 +0100 Initial commit diff --git a/cours.pptx b/cours.pptx new file mode 100644 index 0000000..a1ab0bc Binary files /dev/null and b/cours.pptx differ diff --git a/exemples.R b/exemples.R new file mode 100644 index 0000000..9d8ba37 --- /dev/null +++ b/exemples.R @@ -0,0 +1,310 @@ +library(tidyverse) + +# Un temps ---- + +## Data +data(iris) +data(mtcars) + +mtcars %>% + mutate_at(vars(am, cyl, vs, gear), factor) -> +mtcars + +## Deux groupes + +### Plots + +boxplot(mpg ~ am, data = mtcars) + +ggplot(mtcars) + + aes(x = am, y = mpg) + + geom_boxplot() + + geom_point(alpha = .5) + +### Paramétrique +t.test(mpg ~ am, data = mtcars, var.equal = T) +oneway.test(mpg ~ am, data = mtcars, var.equal = T) + +### Non-paramétrique +wilcox.test(mpg ~ am, data = mtcars) + +## Plusieurs groupes + +### Plots +iris %>% + ggplot() + + aes(x = Species, y = Sepal.Width) + + geom_violin() + + geom_point(position = "jitter", alpha = .5) + +iris %>% + ggplot() + + aes(x = Petal.Length) + + geom_histogram(data = iris %>% select(-Species)) + + geom_histogram(aes(fill = Species)) + + facet_grid(~Species) + +### Paramétrique +oneway.test(Sepal.Width ~ Species, data = iris) + +### Non-paramétrique +kruskal.test(Sepal.Width ~ Species, data = iris) + +## Multivarié + +### Régression linéaire + +# Y = a₁X₁ + a₂X₂ + _ + anXn + ε +# Y = Σ aiXi + ε, ε -> N +plot(mtcars) + +fit <- lm(mpg ~ am, data = mtcars) +summary(fit) +plot(fit) + +fit <- lm(mpg ~ cyl + disp + hp + drat + wt + vs + am, data = mtcars) +summary(fit) +plot(fit) + +### Régression logistique +fit <- glm(am ~ mpg + drat + wt, data = mtcars, family = "binomial") +summary(fit) +plot(fit) + +# Deux temps ---- + +## Data +data(ChickWeight) + +### Test apparié + +#### Plots +ChickWeight %>% + filter(Time %in% c(0, 2)) %>% + ggplot() + + aes(x = Time, y = weight, group = Chick, color = Diet) + + geom_point() + + geom_line() + +#### Test +ChickWeight %>% + group_by(Chick) %>% + summarise(maxTime = max(Time)) %>% + summarise(minTime = min(maxTime)) + +t.test(ChickWeight$weight[ChickWeight$Time == 0], ChickWeight$weight[ChickWeight$Time == 2], paired = T) + +### Test apparié à la main +ChickWeight %>% + filter(Time %in% c(0, 2)) %>% + group_by(Chick) %>% + summarise(weightDiff = last(weight) - first(weight), + Diet = first(Diet)) -> deuxTemps + +#### Plots +deuxTemps %>% + ggplot() + + aes(x = weightDiff, fill = Diet) + + geom_density(alpha = .25) #, position = "fill") # position = "stack" "fill" + +oneway.test(weightDiff ~ Diet, data = deuxTemps) +kruskal.test(weightDiff ~ Diet, data = deuxTemps) + +# Survie ---- +library(survival) + +## Data +data(lung) + +## Bivarié +lung$Surv <- Surv(lung$time, lung$status) +survfit(Surv ~ 1, data = lung) -> fit + +plot(fit) + +survfit(Surv ~ sex, data = lung) -> fit +plot(fit, conf.int = T, col = c("blue", "red")) +legend("topright", legend = c("Hommes", "Femmes"), col = c("blue", "red"), lty = 1) + +survdiff(Surv ~ sex, data = lung) + +## Cox +coxph(Surv ~ sex + age + ph.ecog + ph.karno + meal.cal + wt.loss, data = lung) -> fit +cox.zph(fit) %>% plot + +# N temps ---- +library(lme4) +library(lmerTest) + +## Data +data(ChickWeight) + +## Plots +ChickWeight %>% + ggplot() + + aes(x = Time, y = weight, group = Chick, color = Diet) %>% + geom_line()#alpha = .5) + + geom_smooth(aes(x = Time, y = weight, color = Diet)) + +## Test +lmer(weight ~ Diet + (1|Time) + (1|Chick), data = ChickWeight) -> fit + +summary(fit) +plot(fit) + +# Trajectoires ---- +library(TraMineR) +library(cluster) + +## Data +data(mvad) + +## Description +mvad.seq <- seqdef(mvad, 17:86, xtstep = 12) +plot(mvad.seq, 1:20) +seqdplot(mvad.seq) + +## Clustering +mvad.om <- seqdist(mvad.seq, method = "OM", sm = "TRATE") + +clusterward <- agnes(mvad.om, diss = T, method = "ward") +mvad.cl <- cutree(clusterward, k = 4) +cl.lab <- factor(mvad.cl, labels = paste("Cluster", 1:4)) +seqdplot(mvad.seq, group = cl.lab) +seqfplot(mvad.seq, group = cl.lab) +seqmsplot(mvad.seq, group = cl.lab) + +## Analyse +mvad.ent <- seqient(mvad.seq) +lm.ent <- lm(mvad.ent ~ male + funemp + gcse5eq, data = mvad) +summary(lm.ent) + +# Séries temporelles ---- +library(astsa) +library(Rssa) +library(zoo) +library(TSA) + +## Data +data(cmort) + +## Description des données +is.ts(cmort) #on vérifie qu'il s'agit bien d'un objet série temporelle +start(cmort) #Première observation +end(cmort) #Dernière observation +frequency(cmort) #Fréquance des observations +deltat(cmort) #intervalle de temps entre deux observations + +length(cmort) +head(cmort) #visualisation des premier éléments de l'objet cmort + +tsplot(cmort) #visualisation graphique de la série + + +## Méthode de décomposition +MannKendall(cmort) #test de tendance : significatif donc il existe bien une tendance + +### Modèle additif +D_additif1 = decompose(cmort, "additive") +plot(D_additif1) + +### Modèle additif(autre méthode) +D_additif2 = stl(cmort, "periodic") +plot(D_additif2) + +### Modèle multiplicatif +D_multiplicatif = decompose(cmort, "multiplicative") +plot(D_multiplicatif) + +## Plots + +#Jaune : signal recomposé +#Noir pointillé signal initial +graph = function(var){ + plot(var, lwd = 2, col = "#FFC000", main = substitute(var)) + lines(cmort,lty = 2) +} + +#Reconstitution signal avec décomposition additive et bruit +cmort_ad1 = D_additif1$trend + D_additif1$seasonal + D_additif1$random +graph(cmort_ad1) +#Reconstitution signal avec décomposition additive sans bruit +cmort_ad1_ssrandom = D_additif1$trend + D_additif1$seasonal +graph(cmort_ad1_ssrandom) + + +#Reconstitution signal avec décomposition additive et bruit +cmort_ad2 = D_additif2$time.series[,1] + D_additif2$time.series[,2] + D_additif2$time.series[,3] +graph(cmort_ad2) +#Reconstitution signal avec décomposition additive sans bruit +cmort_ad2_ssrandom = D_additif2$time.series[,1] + D_additif2$time.series[,2] +graph(cmort_ad2_ssrandom) + +#Reconstitution signal avec décomposition multiplicative et bruit +cmort_mult = D_multiplicatif$trend*D_multiplicatif$seasonal*D_multiplicatif$random +graph(cmort_mult) +#Reconstitution signal avec décomposition multiplicative sans bruit +cmort_mult_ssrandom = D_multiplicatif$trend*D_multiplicatif$seasonal +graph(cmort_mult_ssrandom) + + +### Autocorrélation et Modélisation + +#autocorrélation +acf(cmort) + +#autocorrélation partielle +pacf(cmort) + + +### Analyse spectrale + +#périodogramme +p = periodogram(cmort) +#Trouver la période : +# Définir la fréquence où le spectre est maximum +# Période : 1/Freq +frequence = p$freq[which.max(p$spec)] +Periode = 1/frequence + +#Lissage par moyenne mobile +plot(cmort, ylab = "cmort", main = "Lissage par moyenne glissante") +lines(filter(cmort, rep(1/20, 20), side = 1), col = "#FFC000", lwd = 2) +lines(filter(cmort, rep(1/12, 12), side = 1), col = "red", lwd = 2) +lines(filter(cmort, rep(1/50, 50), side = 1), col = "indianred4", lwd = 2) +legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("1/12", "1/20", "1/100"), lwd = c(2,2,2)) + +#Lissage par noyaux +#Calcul de 3 noyaux +k1 <- kernel("daniell", 10) +k2 <- kernel("daniell", 5) +k3 <- kernel("daniell", 30) +#Application des 3 noyaux aux données +x1 <- kernapply(cmort, k1) +x2 <- kernapply(cmort, k2) +x3 <- kernapply(cmort, k3) +#Graphiques +plot(cmort, main = "Lissage par noyaux") +lines(x1, col = "#FFC000", lwd = 2) +lines(x2, col = "red", lwd = 2) +lines(x3, col = "indianred4", lwd = 2) +legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("5", "10", "30"), lwd = c(2,2,2)) + + +### Série temporelle multiple + +#Graphique +serie = cbind(lap[,10],cmort) +colnames(serie)[1] = "ozone" +plot(serie) +abline(v = 1970:1980, col ="#FFC000", lty = 2, lwd = 2) + +#Test de causalité de Granger +grangertest(cmort ~ lap[,4]) #Température +grangertest(cmort ~ lap[,5]) #Humidité relative +grangertest(cmort ~ lap[,6]) #Monoxyde de carbone +grangertest(cmort ~ lap[,7]) #Dioxyde de soufre +grangertest(cmort ~ lap[,8]) #Dioxyde d'azote +grangertest(cmort ~ lap[,9]) #Hydrocarbure +grangertest(cmort ~ lap[,10]) #Ozone +grangertest(cmort ~ lap[,11]) #Particule \ No newline at end of file diff --git a/illustrations.R b/illustrations.R new file mode 100644 index 0000000..cf33ed6 --- /dev/null +++ b/illustrations.R @@ -0,0 +1,182 @@ +library(tidyverse) +library(patchwork) + + +# Graphique test statistique ---- + +seq(100, 0) %>% + map(function(x){ + ci <- function(m, s, n, z = 1.34) + { + dev <- z * (s / sqrt(n)) + c(m - dev, m + dev) + } + + ztest <- function(m1, s1, n1, m2, s2, n2) + { + zscore <- abs(m2 - m1) / sqrt((s1^2 / n1) + (s2^2 / n2)) + 2 * (1 - pnorm(zscore)) + } + + tibble(m1 = x/10, + m2 = - x/10, + s = 10, + n = 20) -> df + + df %>% + ggplot() + + aes(x = m1) + + stat_function(fun = dnorm, args = list(mean = df$m1, sd = df$s), color = "blue", size = 1.5, n = 1000) + + stat_function(fun = dnorm, args = list(mean = df$m2, sd = df$s), color = "red", size = 1.5, n = 1000) + + geom_vline(xintercept = ci(df$m1, df$s, df$n), color = "blue") + + geom_vline(xintercept = ci(df$m2, df$s, df$n), color = "red") + + geom_text(x = -30, hjust = 0, y = .05, vjust = 1, label = str_c("p = ", ztest(df$m1, df$s, df$n, df$m2, df$s, df$n)), size = 4) + + scale_x_continuous(limits = c(-30 , 30)) + + scale_y_continuous(limits = c(0, .05)) + + theme_classic() + + xlab("x") -> p1 + + df %>% + ggplot() + + aes(x = m1) + + stat_function(fun = dnorm, args = list(mean = abs(df$m2 - df$m1), sd = sqrt(df$s^2 / df$n + df$s^2 / df$n))) + + geom_vline(xintercept = 0) + + geom_vline(xintercept = ci(abs(df$m2 - df$m1), sqrt(df$s^2 / df$n + df$s^2 / df$n), 1, 1.96)) + + scale_x_continuous(limits = c(-10, 40)) + + theme_classic() + + xlab("x") -> p2 + + p1 + p2 + plot_layout(ncol = 1, heights = c(8, 2)) +}) -> plots + +1:101 %>% + map(~ ggsave(plot = plots[[.]], + filename = str_c("anim/", str_pad(., width = 3, pad = "0"), ".png"), + unit = "cm", + dpi = 200, + width = 16, + height = 9)) +# Un temps ---- + +one <- tibble(color = rep(LETTERS[1:3], each = 10), + y = map((-1:1), ~rnorm(10, ., .1)) %>% unlist, + x = rnorm(30, 0, .1)) + +one %>% + ggplot() + + geom_segment(x = -1, y = 0, xend = 1, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) + + geom_segment(jx = 0, y = -.1, xend = 0, yend = .1, color = "darkgrey", size = 2) + + aes(x = x, y = y, color = color) + + geom_point(size = 2) + + scale_x_continuous(limits = c(-1, 1)) + + scale_y_continuous(limits = c(-2, 2)) + + theme_void() + + theme(legend.position = "none", + plot.background = element_rect(fill = "transparent", colour = "transparent")) -> +oneplot + +ggsave(plot = oneplot, filename = "one.png", bg = "transparent") + +# Deux temps ---- + +two <- tibble(x = map(c(-.5, .5), ~rnorm(10, ., .1)) %>% unlist, + y = map(c(-1, 1), ~rnorm(10, ., .1)) %>% unlist, + colour = rep("A", 20)) + +two %>% + ggplot() + + geom_segment(x = -1, y = 0, xend = 1, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) + + geom_segment(x = -.5, y = -.1, xend = -.5, yend = .1, color = "darkgrey", size = 2) + + geom_segment(x = .5, y = -.1, xend = .5, yend = .1, color = "darkgrey", size = 2) + + aes(x = x, y = y, colour = colour) + + geom_point(size = 2) + + scale_x_continuous(limits = c(-1, 1)) + + scale_y_continuous(limits = c(-2, 2)) + + theme_void() + + theme(legend.position = "none", + plot.background = element_rect(fill = "transparent", colour = "transparent")) -> +twoplot + +ggsave(plot = twoplot, filename = "two.png", bg = "transparent") + +# N temps ---- + +many <- tibble(x = map(-3:3, ~rnorm(10, ., .1)) %>% unlist, + y = runif(7, -1.5, 1.5) %>% map(~rnorm(10, ., .1)) %>% unlist, + colour = rep("A", 70)) + +many %>% + ggplot() + + geom_segment(x = -4, y = 0, xend = 4, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) + + geom_segment(x = -3, y = -.1, xend = -3, yend = .1, color = "darkgrey", size = 2) + + geom_segment(x = -2, y = -.1, xend = -2, yend = .1, color = "darkgrey", size = 2) + + geom_segment(x = -1, y = -.1, xend = -1, yend = .1, color = "darkgrey", size = 2) + + geom_segment(x = 0, y = -.1, xend = 0, yend = .1, color = "darkgrey", size = 2) + + geom_segment(x = 1, y = -.1, xend = 1, yend = .1, color = "darkgrey", size = 2) + + geom_segment(x = 2, y = -.1, xend = 2, yend = .1, color = "darkgrey", size = 2) + + geom_segment(x = 3, y = -.1, xend = 3, yend = .1, color = "darkgrey", size = 2) + + aes(x = x, y = y, colour = colour) + + geom_point(size = 2) + + scale_x_continuous(limits = c(-4, 4)) + + scale_y_continuous(limits = c(-2, 2)) + + theme_void() + + theme(legend.position = "none", + plot.background = element_rect(fill = "transparent", colour = "transparent")) -> +manyplot + +ggsave(plot = manyplot, filename = "many.png", bg = "transparent") + +# Série temporelle ---- + +## Figure introduction +library(astsa) +data(flu) #chargement de la base "flu" du package astsa + +## Représentation graphique de la série temporelle +tsplot(flu,xlab = "Time", ylab = "Deaths per 10,000 people", + main="Monthly pneumonia and influenza deaths per 10,000 people \n in the United States for 11 years, 1968 to 1978") + +## Simulation série + +### Simulation des paramêtres temps, tendance, saisonalité, bruit +t = time(cmort) +m = 0.2*t+2.1 +s = cos(5*t) +epsilon = rnorm(length(t),0,0.5) + +### Simulation des séries temporelles comme modèle additif/multiplicatif/Hybride +simu.ts.additif = ts(m+s+epsilon,frequency = 52, start = c(1970, 1) ) +tsplot(simu.ts.additif) + +simu.ts.multiplictif = ts(m*s*epsilon,frequency = 52, start = c(1970, 1) ) +tsplot(simu.ts.multiplictif) + +simu.ts.hybride = ts(m*s+epsilon*100,frequency = 52, start = c(1970, 1) ) +tsplot(simu.ts.hybride) + +#### Courbes +plot(t,m, type = "l", main = "Tendance", xlab = "Temps", ylab = "") +plot(t,s, type = "l", main = "Saisonnalité", xlab = "Temps", ylab = "") +plot(t,epsilon, type = "l", main = "Bruit", xlab = "Temps", ylab = "") + +### Décomposition des signaux simulés +plot(decompose(simu.ts.additif, type="additive")) #décomposition trend/seasonnality/noise, modèle additif +plot(stl(simu.ts.additif, "periodic")) +plot(decompose(decompose(simu.ts.additif, type="additive")$random, type="additive")) + +## Simulation modèles AR et MA + +### Simulation AR(1) : X_t = 0.6X_{t???1} + ??_t +X=arima.sim(n = 2400, list(ar = 0.6),sd = 1) +plot(acf(X),lwd=2, main = "Acf AR(1)") +plot(pacf(X),lwd=2, main = "Pacf AR(1)") + +### Simulation AR(2) : Xt = 0.6X_{t???1} - 0.4X_{t???2} + ??t +X=arima.sim(n = 2400, list(ar = c(0.6,-0.4)),sd = 1) +plot(acf(X),lwd=2, main = "Acf AR(2)") +plot(pacf(X),lwd=2, main = "Pacf AR(2)") + +### Simulation MA(1) X_t = ??_t + 0.7??_{t???1} +X=arima.sim(n = 2400, list(ma = .7),sd = 1) +plot(acf(X),lwd=2, main = "Acf MA(1)") +plot(pacf(X),lwd=2, main = "Pacf MA(1)") \ No newline at end of file