@@ -0,0 +1,310 @@ | |||||
library(tidyverse) | |||||
# Un temps ---- | |||||
## Data | |||||
data(iris) | |||||
data(mtcars) | |||||
mtcars %>% | |||||
mutate_at(vars(am, cyl, vs, gear), factor) -> | |||||
mtcars | |||||
## Deux groupes | |||||
### Plots | |||||
boxplot(mpg ~ am, data = mtcars) | |||||
ggplot(mtcars) + | |||||
aes(x = am, y = mpg) + | |||||
geom_boxplot() + | |||||
geom_point(alpha = .5) | |||||
### Paramétrique | |||||
t.test(mpg ~ am, data = mtcars, var.equal = T) | |||||
oneway.test(mpg ~ am, data = mtcars, var.equal = T) | |||||
### Non-paramétrique | |||||
wilcox.test(mpg ~ am, data = mtcars) | |||||
## Plusieurs groupes | |||||
### Plots | |||||
iris %>% | |||||
ggplot() + | |||||
aes(x = Species, y = Sepal.Width) + | |||||
geom_violin() + | |||||
geom_point(position = "jitter", alpha = .5) | |||||
iris %>% | |||||
ggplot() + | |||||
aes(x = Petal.Length) + | |||||
geom_histogram(data = iris %>% select(-Species)) + | |||||
geom_histogram(aes(fill = Species)) + | |||||
facet_grid(~Species) | |||||
### Paramétrique | |||||
oneway.test(Sepal.Width ~ Species, data = iris) | |||||
### Non-paramétrique | |||||
kruskal.test(Sepal.Width ~ Species, data = iris) | |||||
## Multivarié | |||||
### Régression linéaire | |||||
# Y = a₁X₁ + a₂X₂ + _ + anXn + ε | |||||
# Y = Σ aiXi + ε, ε -> N | |||||
plot(mtcars) | |||||
fit <- lm(mpg ~ am, data = mtcars) | |||||
summary(fit) | |||||
plot(fit) | |||||
fit <- lm(mpg ~ cyl + disp + hp + drat + wt + vs + am, data = mtcars) | |||||
summary(fit) | |||||
plot(fit) | |||||
### Régression logistique | |||||
fit <- glm(am ~ mpg + drat + wt, data = mtcars, family = "binomial") | |||||
summary(fit) | |||||
plot(fit) | |||||
# Deux temps ---- | |||||
## Data | |||||
data(ChickWeight) | |||||
### Test apparié | |||||
#### Plots | |||||
ChickWeight %>% | |||||
filter(Time %in% c(0, 2)) %>% | |||||
ggplot() + | |||||
aes(x = Time, y = weight, group = Chick, color = Diet) + | |||||
geom_point() + | |||||
geom_line() | |||||
#### Test | |||||
ChickWeight %>% | |||||
group_by(Chick) %>% | |||||
summarise(maxTime = max(Time)) %>% | |||||
summarise(minTime = min(maxTime)) | |||||
t.test(ChickWeight$weight[ChickWeight$Time == 0], ChickWeight$weight[ChickWeight$Time == 2], paired = T) | |||||
### Test apparié à la main | |||||
ChickWeight %>% | |||||
filter(Time %in% c(0, 2)) %>% | |||||
group_by(Chick) %>% | |||||
summarise(weightDiff = last(weight) - first(weight), | |||||
Diet = first(Diet)) -> deuxTemps | |||||
#### Plots | |||||
deuxTemps %>% | |||||
ggplot() + | |||||
aes(x = weightDiff, fill = Diet) + | |||||
geom_density(alpha = .25) #, position = "fill") # position = "stack" "fill" | |||||
oneway.test(weightDiff ~ Diet, data = deuxTemps) | |||||
kruskal.test(weightDiff ~ Diet, data = deuxTemps) | |||||
# Survie ---- | |||||
library(survival) | |||||
## Data | |||||
data(lung) | |||||
## Bivarié | |||||
lung$Surv <- Surv(lung$time, lung$status) | |||||
survfit(Surv ~ 1, data = lung) -> fit | |||||
plot(fit) | |||||
survfit(Surv ~ sex, data = lung) -> fit | |||||
plot(fit, conf.int = T, col = c("blue", "red")) | |||||
legend("topright", legend = c("Hommes", "Femmes"), col = c("blue", "red"), lty = 1) | |||||
survdiff(Surv ~ sex, data = lung) | |||||
## Cox | |||||
coxph(Surv ~ sex + age + ph.ecog + ph.karno + meal.cal + wt.loss, data = lung) -> fit | |||||
cox.zph(fit) %>% plot | |||||
# N temps ---- | |||||
library(lme4) | |||||
library(lmerTest) | |||||
## Data | |||||
data(ChickWeight) | |||||
## Plots | |||||
ChickWeight %>% | |||||
ggplot() + | |||||
aes(x = Time, y = weight, group = Chick, color = Diet) %>% | |||||
geom_line()#alpha = .5) + | |||||
geom_smooth(aes(x = Time, y = weight, color = Diet)) | |||||
## Test | |||||
lmer(weight ~ Diet + (1|Time) + (1|Chick), data = ChickWeight) -> fit | |||||
summary(fit) | |||||
plot(fit) | |||||
# Trajectoires ---- | |||||
library(TraMineR) | |||||
library(cluster) | |||||
## Data | |||||
data(mvad) | |||||
## Description | |||||
mvad.seq <- seqdef(mvad, 17:86, xtstep = 12) | |||||
plot(mvad.seq, 1:20) | |||||
seqdplot(mvad.seq) | |||||
## Clustering | |||||
mvad.om <- seqdist(mvad.seq, method = "OM", sm = "TRATE") | |||||
clusterward <- agnes(mvad.om, diss = T, method = "ward") | |||||
mvad.cl <- cutree(clusterward, k = 4) | |||||
cl.lab <- factor(mvad.cl, labels = paste("Cluster", 1:4)) | |||||
seqdplot(mvad.seq, group = cl.lab) | |||||
seqfplot(mvad.seq, group = cl.lab) | |||||
seqmsplot(mvad.seq, group = cl.lab) | |||||
## Analyse | |||||
mvad.ent <- seqient(mvad.seq) | |||||
lm.ent <- lm(mvad.ent ~ male + funemp + gcse5eq, data = mvad) | |||||
summary(lm.ent) | |||||
# Séries temporelles ---- | |||||
library(astsa) | |||||
library(Rssa) | |||||
library(zoo) | |||||
library(TSA) | |||||
## Data | |||||
data(cmort) | |||||
## Description des données | |||||
is.ts(cmort) #on vérifie qu'il s'agit bien d'un objet série temporelle | |||||
start(cmort) #Première observation | |||||
end(cmort) #Dernière observation | |||||
frequency(cmort) #Fréquance des observations | |||||
deltat(cmort) #intervalle de temps entre deux observations | |||||
length(cmort) | |||||
head(cmort) #visualisation des premier éléments de l'objet cmort | |||||
tsplot(cmort) #visualisation graphique de la série | |||||
## Méthode de décomposition | |||||
MannKendall(cmort) #test de tendance : significatif donc il existe bien une tendance | |||||
### Modèle additif | |||||
D_additif1 = decompose(cmort, "additive") | |||||
plot(D_additif1) | |||||
### Modèle additif(autre méthode) | |||||
D_additif2 = stl(cmort, "periodic") | |||||
plot(D_additif2) | |||||
### Modèle multiplicatif | |||||
D_multiplicatif = decompose(cmort, "multiplicative") | |||||
plot(D_multiplicatif) | |||||
## Plots | |||||
#Jaune : signal recomposé | |||||
#Noir pointillé signal initial | |||||
graph = function(var){ | |||||
plot(var, lwd = 2, col = "#FFC000", main = substitute(var)) | |||||
lines(cmort,lty = 2) | |||||
} | |||||
#Reconstitution signal avec décomposition additive et bruit | |||||
cmort_ad1 = D_additif1$trend + D_additif1$seasonal + D_additif1$random | |||||
graph(cmort_ad1) | |||||
#Reconstitution signal avec décomposition additive sans bruit | |||||
cmort_ad1_ssrandom = D_additif1$trend + D_additif1$seasonal | |||||
graph(cmort_ad1_ssrandom) | |||||
#Reconstitution signal avec décomposition additive et bruit | |||||
cmort_ad2 = D_additif2$time.series[,1] + D_additif2$time.series[,2] + D_additif2$time.series[,3] | |||||
graph(cmort_ad2) | |||||
#Reconstitution signal avec décomposition additive sans bruit | |||||
cmort_ad2_ssrandom = D_additif2$time.series[,1] + D_additif2$time.series[,2] | |||||
graph(cmort_ad2_ssrandom) | |||||
#Reconstitution signal avec décomposition multiplicative et bruit | |||||
cmort_mult = D_multiplicatif$trend*D_multiplicatif$seasonal*D_multiplicatif$random | |||||
graph(cmort_mult) | |||||
#Reconstitution signal avec décomposition multiplicative sans bruit | |||||
cmort_mult_ssrandom = D_multiplicatif$trend*D_multiplicatif$seasonal | |||||
graph(cmort_mult_ssrandom) | |||||
### Autocorrélation et Modélisation | |||||
#autocorrélation | |||||
acf(cmort) | |||||
#autocorrélation partielle | |||||
pacf(cmort) | |||||
### Analyse spectrale | |||||
#périodogramme | |||||
p = periodogram(cmort) | |||||
#Trouver la période : | |||||
# Définir la fréquence où le spectre est maximum | |||||
# Période : 1/Freq | |||||
frequence = p$freq[which.max(p$spec)] | |||||
Periode = 1/frequence | |||||
#Lissage par moyenne mobile | |||||
plot(cmort, ylab = "cmort", main = "Lissage par moyenne glissante") | |||||
lines(filter(cmort, rep(1/20, 20), side = 1), col = "#FFC000", lwd = 2) | |||||
lines(filter(cmort, rep(1/12, 12), side = 1), col = "red", lwd = 2) | |||||
lines(filter(cmort, rep(1/50, 50), side = 1), col = "indianred4", lwd = 2) | |||||
legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("1/12", "1/20", "1/100"), lwd = c(2,2,2)) | |||||
#Lissage par noyaux | |||||
#Calcul de 3 noyaux | |||||
k1 <- kernel("daniell", 10) | |||||
k2 <- kernel("daniell", 5) | |||||
k3 <- kernel("daniell", 30) | |||||
#Application des 3 noyaux aux données | |||||
x1 <- kernapply(cmort, k1) | |||||
x2 <- kernapply(cmort, k2) | |||||
x3 <- kernapply(cmort, k3) | |||||
#Graphiques | |||||
plot(cmort, main = "Lissage par noyaux") | |||||
lines(x1, col = "#FFC000", lwd = 2) | |||||
lines(x2, col = "red", lwd = 2) | |||||
lines(x3, col = "indianred4", lwd = 2) | |||||
legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("5", "10", "30"), lwd = c(2,2,2)) | |||||
### Série temporelle multiple | |||||
#Graphique | |||||
serie = cbind(lap[,10],cmort) | |||||
colnames(serie)[1] = "ozone" | |||||
plot(serie) | |||||
abline(v = 1970:1980, col ="#FFC000", lty = 2, lwd = 2) | |||||
#Test de causalité de Granger | |||||
grangertest(cmort ~ lap[,4]) #Température | |||||
grangertest(cmort ~ lap[,5]) #Humidité relative | |||||
grangertest(cmort ~ lap[,6]) #Monoxyde de carbone | |||||
grangertest(cmort ~ lap[,7]) #Dioxyde de soufre | |||||
grangertest(cmort ~ lap[,8]) #Dioxyde d'azote | |||||
grangertest(cmort ~ lap[,9]) #Hydrocarbure | |||||
grangertest(cmort ~ lap[,10]) #Ozone | |||||
grangertest(cmort ~ lap[,11]) #Particule |
@@ -0,0 +1,182 @@ | |||||
library(tidyverse) | |||||
library(patchwork) | |||||
# Graphique test statistique ---- | |||||
seq(100, 0) %>% | |||||
map(function(x){ | |||||
ci <- function(m, s, n, z = 1.34) | |||||
{ | |||||
dev <- z * (s / sqrt(n)) | |||||
c(m - dev, m + dev) | |||||
} | |||||
ztest <- function(m1, s1, n1, m2, s2, n2) | |||||
{ | |||||
zscore <- abs(m2 - m1) / sqrt((s1^2 / n1) + (s2^2 / n2)) | |||||
2 * (1 - pnorm(zscore)) | |||||
} | |||||
tibble(m1 = x/10, | |||||
m2 = - x/10, | |||||
s = 10, | |||||
n = 20) -> df | |||||
df %>% | |||||
ggplot() + | |||||
aes(x = m1) + | |||||
stat_function(fun = dnorm, args = list(mean = df$m1, sd = df$s), color = "blue", size = 1.5, n = 1000) + | |||||
stat_function(fun = dnorm, args = list(mean = df$m2, sd = df$s), color = "red", size = 1.5, n = 1000) + | |||||
geom_vline(xintercept = ci(df$m1, df$s, df$n), color = "blue") + | |||||
geom_vline(xintercept = ci(df$m2, df$s, df$n), color = "red") + | |||||
geom_text(x = -30, hjust = 0, y = .05, vjust = 1, label = str_c("p = ", ztest(df$m1, df$s, df$n, df$m2, df$s, df$n)), size = 4) + | |||||
scale_x_continuous(limits = c(-30 , 30)) + | |||||
scale_y_continuous(limits = c(0, .05)) + | |||||
theme_classic() + | |||||
xlab("x") -> p1 | |||||
df %>% | |||||
ggplot() + | |||||
aes(x = m1) + | |||||
stat_function(fun = dnorm, args = list(mean = abs(df$m2 - df$m1), sd = sqrt(df$s^2 / df$n + df$s^2 / df$n))) + | |||||
geom_vline(xintercept = 0) + | |||||
geom_vline(xintercept = ci(abs(df$m2 - df$m1), sqrt(df$s^2 / df$n + df$s^2 / df$n), 1, 1.96)) + | |||||
scale_x_continuous(limits = c(-10, 40)) + | |||||
theme_classic() + | |||||
xlab("x") -> p2 | |||||
p1 + p2 + plot_layout(ncol = 1, heights = c(8, 2)) | |||||
}) -> plots | |||||
1:101 %>% | |||||
map(~ ggsave(plot = plots[[.]], | |||||
filename = str_c("anim/", str_pad(., width = 3, pad = "0"), ".png"), | |||||
unit = "cm", | |||||
dpi = 200, | |||||
width = 16, | |||||
height = 9)) | |||||
# Un temps ---- | |||||
one <- tibble(color = rep(LETTERS[1:3], each = 10), | |||||
y = map((-1:1), ~rnorm(10, ., .1)) %>% unlist, | |||||
x = rnorm(30, 0, .1)) | |||||
one %>% | |||||
ggplot() + | |||||
geom_segment(x = -1, y = 0, xend = 1, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) + | |||||
geom_segment(jx = 0, y = -.1, xend = 0, yend = .1, color = "darkgrey", size = 2) + | |||||
aes(x = x, y = y, color = color) + | |||||
geom_point(size = 2) + | |||||
scale_x_continuous(limits = c(-1, 1)) + | |||||
scale_y_continuous(limits = c(-2, 2)) + | |||||
theme_void() + | |||||
theme(legend.position = "none", | |||||
plot.background = element_rect(fill = "transparent", colour = "transparent")) -> | |||||
oneplot | |||||
ggsave(plot = oneplot, filename = "one.png", bg = "transparent") | |||||
# Deux temps ---- | |||||
two <- tibble(x = map(c(-.5, .5), ~rnorm(10, ., .1)) %>% unlist, | |||||
y = map(c(-1, 1), ~rnorm(10, ., .1)) %>% unlist, | |||||
colour = rep("A", 20)) | |||||
two %>% | |||||
ggplot() + | |||||
geom_segment(x = -1, y = 0, xend = 1, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) + | |||||
geom_segment(x = -.5, y = -.1, xend = -.5, yend = .1, color = "darkgrey", size = 2) + | |||||
geom_segment(x = .5, y = -.1, xend = .5, yend = .1, color = "darkgrey", size = 2) + | |||||
aes(x = x, y = y, colour = colour) + | |||||
geom_point(size = 2) + | |||||
scale_x_continuous(limits = c(-1, 1)) + | |||||
scale_y_continuous(limits = c(-2, 2)) + | |||||
theme_void() + | |||||
theme(legend.position = "none", | |||||
plot.background = element_rect(fill = "transparent", colour = "transparent")) -> | |||||
twoplot | |||||
ggsave(plot = twoplot, filename = "two.png", bg = "transparent") | |||||
# N temps ---- | |||||
many <- tibble(x = map(-3:3, ~rnorm(10, ., .1)) %>% unlist, | |||||
y = runif(7, -1.5, 1.5) %>% map(~rnorm(10, ., .1)) %>% unlist, | |||||
colour = rep("A", 70)) | |||||
many %>% | |||||
ggplot() + | |||||
geom_segment(x = -4, y = 0, xend = 4, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) + | |||||
geom_segment(x = -3, y = -.1, xend = -3, yend = .1, color = "darkgrey", size = 2) + | |||||
geom_segment(x = -2, y = -.1, xend = -2, yend = .1, color = "darkgrey", size = 2) + | |||||
geom_segment(x = -1, y = -.1, xend = -1, yend = .1, color = "darkgrey", size = 2) + | |||||
geom_segment(x = 0, y = -.1, xend = 0, yend = .1, color = "darkgrey", size = 2) + | |||||
geom_segment(x = 1, y = -.1, xend = 1, yend = .1, color = "darkgrey", size = 2) + | |||||
geom_segment(x = 2, y = -.1, xend = 2, yend = .1, color = "darkgrey", size = 2) + | |||||
geom_segment(x = 3, y = -.1, xend = 3, yend = .1, color = "darkgrey", size = 2) + | |||||
aes(x = x, y = y, colour = colour) + | |||||
geom_point(size = 2) + | |||||
scale_x_continuous(limits = c(-4, 4)) + | |||||
scale_y_continuous(limits = c(-2, 2)) + | |||||
theme_void() + | |||||
theme(legend.position = "none", | |||||
plot.background = element_rect(fill = "transparent", colour = "transparent")) -> | |||||
manyplot | |||||
ggsave(plot = manyplot, filename = "many.png", bg = "transparent") | |||||
# Série temporelle ---- | |||||
## Figure introduction | |||||
library(astsa) | |||||
data(flu) #chargement de la base "flu" du package astsa | |||||
## Représentation graphique de la série temporelle | |||||
tsplot(flu,xlab = "Time", ylab = "Deaths per 10,000 people", | |||||
main="Monthly pneumonia and influenza deaths per 10,000 people \n in the United States for 11 years, 1968 to 1978") | |||||
## Simulation série | |||||
### Simulation des paramêtres temps, tendance, saisonalité, bruit | |||||
t = time(cmort) | |||||
m = 0.2*t+2.1 | |||||
s = cos(5*t) | |||||
epsilon = rnorm(length(t),0,0.5) | |||||
### Simulation des séries temporelles comme modèle additif/multiplicatif/Hybride | |||||
simu.ts.additif = ts(m+s+epsilon,frequency = 52, start = c(1970, 1) ) | |||||
tsplot(simu.ts.additif) | |||||
simu.ts.multiplictif = ts(m*s*epsilon,frequency = 52, start = c(1970, 1) ) | |||||
tsplot(simu.ts.multiplictif) | |||||
simu.ts.hybride = ts(m*s+epsilon*100,frequency = 52, start = c(1970, 1) ) | |||||
tsplot(simu.ts.hybride) | |||||
#### Courbes | |||||
plot(t,m, type = "l", main = "Tendance", xlab = "Temps", ylab = "") | |||||
plot(t,s, type = "l", main = "Saisonnalité", xlab = "Temps", ylab = "") | |||||
plot(t,epsilon, type = "l", main = "Bruit", xlab = "Temps", ylab = "") | |||||
### Décomposition des signaux simulés | |||||
plot(decompose(simu.ts.additif, type="additive")) #décomposition trend/seasonnality/noise, modèle additif | |||||
plot(stl(simu.ts.additif, "periodic")) | |||||
plot(decompose(decompose(simu.ts.additif, type="additive")$random, type="additive")) | |||||
## Simulation modèles AR et MA | |||||
### Simulation AR(1) : X_t = 0.6X_{t???1} + ??_t | |||||
X=arima.sim(n = 2400, list(ar = 0.6),sd = 1) | |||||
plot(acf(X),lwd=2, main = "Acf AR(1)") | |||||
plot(pacf(X),lwd=2, main = "Pacf AR(1)") | |||||
### Simulation AR(2) : Xt = 0.6X_{t???1} - 0.4X_{t???2} + ??t | |||||
X=arima.sim(n = 2400, list(ar = c(0.6,-0.4)),sd = 1) | |||||
plot(acf(X),lwd=2, main = "Acf AR(2)") | |||||
plot(pacf(X),lwd=2, main = "Pacf AR(2)") | |||||
### Simulation MA(1) X_t = ??_t + 0.7??_{t???1} | |||||
X=arima.sim(n = 2400, list(ma = .7),sd = 1) | |||||
plot(acf(X),lwd=2, main = "Acf MA(1)") | |||||
plot(pacf(X),lwd=2, main = "Pacf MA(1)") |