@@ -0,0 +1,310 @@ | |||
library(tidyverse) | |||
# Un temps ---- | |||
## Data | |||
data(iris) | |||
data(mtcars) | |||
mtcars %>% | |||
mutate_at(vars(am, cyl, vs, gear), factor) -> | |||
mtcars | |||
## Deux groupes | |||
### Plots | |||
boxplot(mpg ~ am, data = mtcars) | |||
ggplot(mtcars) + | |||
aes(x = am, y = mpg) + | |||
geom_boxplot() + | |||
geom_point(alpha = .5) | |||
### Paramétrique | |||
t.test(mpg ~ am, data = mtcars, var.equal = T) | |||
oneway.test(mpg ~ am, data = mtcars, var.equal = T) | |||
### Non-paramétrique | |||
wilcox.test(mpg ~ am, data = mtcars) | |||
## Plusieurs groupes | |||
### Plots | |||
iris %>% | |||
ggplot() + | |||
aes(x = Species, y = Sepal.Width) + | |||
geom_violin() + | |||
geom_point(position = "jitter", alpha = .5) | |||
iris %>% | |||
ggplot() + | |||
aes(x = Petal.Length) + | |||
geom_histogram(data = iris %>% select(-Species)) + | |||
geom_histogram(aes(fill = Species)) + | |||
facet_grid(~Species) | |||
### Paramétrique | |||
oneway.test(Sepal.Width ~ Species, data = iris) | |||
### Non-paramétrique | |||
kruskal.test(Sepal.Width ~ Species, data = iris) | |||
## Multivarié | |||
### Régression linéaire | |||
# Y = a₁X₁ + a₂X₂ + _ + anXn + ε | |||
# Y = Σ aiXi + ε, ε -> N | |||
plot(mtcars) | |||
fit <- lm(mpg ~ am, data = mtcars) | |||
summary(fit) | |||
plot(fit) | |||
fit <- lm(mpg ~ cyl + disp + hp + drat + wt + vs + am, data = mtcars) | |||
summary(fit) | |||
plot(fit) | |||
### Régression logistique | |||
fit <- glm(am ~ mpg + drat + wt, data = mtcars, family = "binomial") | |||
summary(fit) | |||
plot(fit) | |||
# Deux temps ---- | |||
## Data | |||
data(ChickWeight) | |||
### Test apparié | |||
#### Plots | |||
ChickWeight %>% | |||
filter(Time %in% c(0, 2)) %>% | |||
ggplot() + | |||
aes(x = Time, y = weight, group = Chick, color = Diet) + | |||
geom_point() + | |||
geom_line() | |||
#### Test | |||
ChickWeight %>% | |||
group_by(Chick) %>% | |||
summarise(maxTime = max(Time)) %>% | |||
summarise(minTime = min(maxTime)) | |||
t.test(ChickWeight$weight[ChickWeight$Time == 0], ChickWeight$weight[ChickWeight$Time == 2], paired = T) | |||
### Test apparié à la main | |||
ChickWeight %>% | |||
filter(Time %in% c(0, 2)) %>% | |||
group_by(Chick) %>% | |||
summarise(weightDiff = last(weight) - first(weight), | |||
Diet = first(Diet)) -> deuxTemps | |||
#### Plots | |||
deuxTemps %>% | |||
ggplot() + | |||
aes(x = weightDiff, fill = Diet) + | |||
geom_density(alpha = .25) #, position = "fill") # position = "stack" "fill" | |||
oneway.test(weightDiff ~ Diet, data = deuxTemps) | |||
kruskal.test(weightDiff ~ Diet, data = deuxTemps) | |||
# Survie ---- | |||
library(survival) | |||
## Data | |||
data(lung) | |||
## Bivarié | |||
lung$Surv <- Surv(lung$time, lung$status) | |||
survfit(Surv ~ 1, data = lung) -> fit | |||
plot(fit) | |||
survfit(Surv ~ sex, data = lung) -> fit | |||
plot(fit, conf.int = T, col = c("blue", "red")) | |||
legend("topright", legend = c("Hommes", "Femmes"), col = c("blue", "red"), lty = 1) | |||
survdiff(Surv ~ sex, data = lung) | |||
## Cox | |||
coxph(Surv ~ sex + age + ph.ecog + ph.karno + meal.cal + wt.loss, data = lung) -> fit | |||
cox.zph(fit) %>% plot | |||
# N temps ---- | |||
library(lme4) | |||
library(lmerTest) | |||
## Data | |||
data(ChickWeight) | |||
## Plots | |||
ChickWeight %>% | |||
ggplot() + | |||
aes(x = Time, y = weight, group = Chick, color = Diet) %>% | |||
geom_line()#alpha = .5) + | |||
geom_smooth(aes(x = Time, y = weight, color = Diet)) | |||
## Test | |||
lmer(weight ~ Diet + (1|Time) + (1|Chick), data = ChickWeight) -> fit | |||
summary(fit) | |||
plot(fit) | |||
# Trajectoires ---- | |||
library(TraMineR) | |||
library(cluster) | |||
## Data | |||
data(mvad) | |||
## Description | |||
mvad.seq <- seqdef(mvad, 17:86, xtstep = 12) | |||
plot(mvad.seq, 1:20) | |||
seqdplot(mvad.seq) | |||
## Clustering | |||
mvad.om <- seqdist(mvad.seq, method = "OM", sm = "TRATE") | |||
clusterward <- agnes(mvad.om, diss = T, method = "ward") | |||
mvad.cl <- cutree(clusterward, k = 4) | |||
cl.lab <- factor(mvad.cl, labels = paste("Cluster", 1:4)) | |||
seqdplot(mvad.seq, group = cl.lab) | |||
seqfplot(mvad.seq, group = cl.lab) | |||
seqmsplot(mvad.seq, group = cl.lab) | |||
## Analyse | |||
mvad.ent <- seqient(mvad.seq) | |||
lm.ent <- lm(mvad.ent ~ male + funemp + gcse5eq, data = mvad) | |||
summary(lm.ent) | |||
# Séries temporelles ---- | |||
library(astsa) | |||
library(Rssa) | |||
library(zoo) | |||
library(TSA) | |||
## Data | |||
data(cmort) | |||
## Description des données | |||
is.ts(cmort) #on vérifie qu'il s'agit bien d'un objet série temporelle | |||
start(cmort) #Première observation | |||
end(cmort) #Dernière observation | |||
frequency(cmort) #Fréquance des observations | |||
deltat(cmort) #intervalle de temps entre deux observations | |||
length(cmort) | |||
head(cmort) #visualisation des premier éléments de l'objet cmort | |||
tsplot(cmort) #visualisation graphique de la série | |||
## Méthode de décomposition | |||
MannKendall(cmort) #test de tendance : significatif donc il existe bien une tendance | |||
### Modèle additif | |||
D_additif1 = decompose(cmort, "additive") | |||
plot(D_additif1) | |||
### Modèle additif(autre méthode) | |||
D_additif2 = stl(cmort, "periodic") | |||
plot(D_additif2) | |||
### Modèle multiplicatif | |||
D_multiplicatif = decompose(cmort, "multiplicative") | |||
plot(D_multiplicatif) | |||
## Plots | |||
#Jaune : signal recomposé | |||
#Noir pointillé signal initial | |||
graph = function(var){ | |||
plot(var, lwd = 2, col = "#FFC000", main = substitute(var)) | |||
lines(cmort,lty = 2) | |||
} | |||
#Reconstitution signal avec décomposition additive et bruit | |||
cmort_ad1 = D_additif1$trend + D_additif1$seasonal + D_additif1$random | |||
graph(cmort_ad1) | |||
#Reconstitution signal avec décomposition additive sans bruit | |||
cmort_ad1_ssrandom = D_additif1$trend + D_additif1$seasonal | |||
graph(cmort_ad1_ssrandom) | |||
#Reconstitution signal avec décomposition additive et bruit | |||
cmort_ad2 = D_additif2$time.series[,1] + D_additif2$time.series[,2] + D_additif2$time.series[,3] | |||
graph(cmort_ad2) | |||
#Reconstitution signal avec décomposition additive sans bruit | |||
cmort_ad2_ssrandom = D_additif2$time.series[,1] + D_additif2$time.series[,2] | |||
graph(cmort_ad2_ssrandom) | |||
#Reconstitution signal avec décomposition multiplicative et bruit | |||
cmort_mult = D_multiplicatif$trend*D_multiplicatif$seasonal*D_multiplicatif$random | |||
graph(cmort_mult) | |||
#Reconstitution signal avec décomposition multiplicative sans bruit | |||
cmort_mult_ssrandom = D_multiplicatif$trend*D_multiplicatif$seasonal | |||
graph(cmort_mult_ssrandom) | |||
### Autocorrélation et Modélisation | |||
#autocorrélation | |||
acf(cmort) | |||
#autocorrélation partielle | |||
pacf(cmort) | |||
### Analyse spectrale | |||
#périodogramme | |||
p = periodogram(cmort) | |||
#Trouver la période : | |||
# Définir la fréquence où le spectre est maximum | |||
# Période : 1/Freq | |||
frequence = p$freq[which.max(p$spec)] | |||
Periode = 1/frequence | |||
#Lissage par moyenne mobile | |||
plot(cmort, ylab = "cmort", main = "Lissage par moyenne glissante") | |||
lines(filter(cmort, rep(1/20, 20), side = 1), col = "#FFC000", lwd = 2) | |||
lines(filter(cmort, rep(1/12, 12), side = 1), col = "red", lwd = 2) | |||
lines(filter(cmort, rep(1/50, 50), side = 1), col = "indianred4", lwd = 2) | |||
legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("1/12", "1/20", "1/100"), lwd = c(2,2,2)) | |||
#Lissage par noyaux | |||
#Calcul de 3 noyaux | |||
k1 <- kernel("daniell", 10) | |||
k2 <- kernel("daniell", 5) | |||
k3 <- kernel("daniell", 30) | |||
#Application des 3 noyaux aux données | |||
x1 <- kernapply(cmort, k1) | |||
x2 <- kernapply(cmort, k2) | |||
x3 <- kernapply(cmort, k3) | |||
#Graphiques | |||
plot(cmort, main = "Lissage par noyaux") | |||
lines(x1, col = "#FFC000", lwd = 2) | |||
lines(x2, col = "red", lwd = 2) | |||
lines(x3, col = "indianred4", lwd = 2) | |||
legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("5", "10", "30"), lwd = c(2,2,2)) | |||
### Série temporelle multiple | |||
#Graphique | |||
serie = cbind(lap[,10],cmort) | |||
colnames(serie)[1] = "ozone" | |||
plot(serie) | |||
abline(v = 1970:1980, col ="#FFC000", lty = 2, lwd = 2) | |||
#Test de causalité de Granger | |||
grangertest(cmort ~ lap[,4]) #Température | |||
grangertest(cmort ~ lap[,5]) #Humidité relative | |||
grangertest(cmort ~ lap[,6]) #Monoxyde de carbone | |||
grangertest(cmort ~ lap[,7]) #Dioxyde de soufre | |||
grangertest(cmort ~ lap[,8]) #Dioxyde d'azote | |||
grangertest(cmort ~ lap[,9]) #Hydrocarbure | |||
grangertest(cmort ~ lap[,10]) #Ozone | |||
grangertest(cmort ~ lap[,11]) #Particule |
@@ -0,0 +1,182 @@ | |||
library(tidyverse) | |||
library(patchwork) | |||
# Graphique test statistique ---- | |||
seq(100, 0) %>% | |||
map(function(x){ | |||
ci <- function(m, s, n, z = 1.34) | |||
{ | |||
dev <- z * (s / sqrt(n)) | |||
c(m - dev, m + dev) | |||
} | |||
ztest <- function(m1, s1, n1, m2, s2, n2) | |||
{ | |||
zscore <- abs(m2 - m1) / sqrt((s1^2 / n1) + (s2^2 / n2)) | |||
2 * (1 - pnorm(zscore)) | |||
} | |||
tibble(m1 = x/10, | |||
m2 = - x/10, | |||
s = 10, | |||
n = 20) -> df | |||
df %>% | |||
ggplot() + | |||
aes(x = m1) + | |||
stat_function(fun = dnorm, args = list(mean = df$m1, sd = df$s), color = "blue", size = 1.5, n = 1000) + | |||
stat_function(fun = dnorm, args = list(mean = df$m2, sd = df$s), color = "red", size = 1.5, n = 1000) + | |||
geom_vline(xintercept = ci(df$m1, df$s, df$n), color = "blue") + | |||
geom_vline(xintercept = ci(df$m2, df$s, df$n), color = "red") + | |||
geom_text(x = -30, hjust = 0, y = .05, vjust = 1, label = str_c("p = ", ztest(df$m1, df$s, df$n, df$m2, df$s, df$n)), size = 4) + | |||
scale_x_continuous(limits = c(-30 , 30)) + | |||
scale_y_continuous(limits = c(0, .05)) + | |||
theme_classic() + | |||
xlab("x") -> p1 | |||
df %>% | |||
ggplot() + | |||
aes(x = m1) + | |||
stat_function(fun = dnorm, args = list(mean = abs(df$m2 - df$m1), sd = sqrt(df$s^2 / df$n + df$s^2 / df$n))) + | |||
geom_vline(xintercept = 0) + | |||
geom_vline(xintercept = ci(abs(df$m2 - df$m1), sqrt(df$s^2 / df$n + df$s^2 / df$n), 1, 1.96)) + | |||
scale_x_continuous(limits = c(-10, 40)) + | |||
theme_classic() + | |||
xlab("x") -> p2 | |||
p1 + p2 + plot_layout(ncol = 1, heights = c(8, 2)) | |||
}) -> plots | |||
1:101 %>% | |||
map(~ ggsave(plot = plots[[.]], | |||
filename = str_c("anim/", str_pad(., width = 3, pad = "0"), ".png"), | |||
unit = "cm", | |||
dpi = 200, | |||
width = 16, | |||
height = 9)) | |||
# Un temps ---- | |||
one <- tibble(color = rep(LETTERS[1:3], each = 10), | |||
y = map((-1:1), ~rnorm(10, ., .1)) %>% unlist, | |||
x = rnorm(30, 0, .1)) | |||
one %>% | |||
ggplot() + | |||
geom_segment(x = -1, y = 0, xend = 1, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) + | |||
geom_segment(jx = 0, y = -.1, xend = 0, yend = .1, color = "darkgrey", size = 2) + | |||
aes(x = x, y = y, color = color) + | |||
geom_point(size = 2) + | |||
scale_x_continuous(limits = c(-1, 1)) + | |||
scale_y_continuous(limits = c(-2, 2)) + | |||
theme_void() + | |||
theme(legend.position = "none", | |||
plot.background = element_rect(fill = "transparent", colour = "transparent")) -> | |||
oneplot | |||
ggsave(plot = oneplot, filename = "one.png", bg = "transparent") | |||
# Deux temps ---- | |||
two <- tibble(x = map(c(-.5, .5), ~rnorm(10, ., .1)) %>% unlist, | |||
y = map(c(-1, 1), ~rnorm(10, ., .1)) %>% unlist, | |||
colour = rep("A", 20)) | |||
two %>% | |||
ggplot() + | |||
geom_segment(x = -1, y = 0, xend = 1, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) + | |||
geom_segment(x = -.5, y = -.1, xend = -.5, yend = .1, color = "darkgrey", size = 2) + | |||
geom_segment(x = .5, y = -.1, xend = .5, yend = .1, color = "darkgrey", size = 2) + | |||
aes(x = x, y = y, colour = colour) + | |||
geom_point(size = 2) + | |||
scale_x_continuous(limits = c(-1, 1)) + | |||
scale_y_continuous(limits = c(-2, 2)) + | |||
theme_void() + | |||
theme(legend.position = "none", | |||
plot.background = element_rect(fill = "transparent", colour = "transparent")) -> | |||
twoplot | |||
ggsave(plot = twoplot, filename = "two.png", bg = "transparent") | |||
# N temps ---- | |||
many <- tibble(x = map(-3:3, ~rnorm(10, ., .1)) %>% unlist, | |||
y = runif(7, -1.5, 1.5) %>% map(~rnorm(10, ., .1)) %>% unlist, | |||
colour = rep("A", 70)) | |||
many %>% | |||
ggplot() + | |||
geom_segment(x = -4, y = 0, xend = 4, yend = 0, color = "darkgrey", arrow = arrow(), size = 2) + | |||
geom_segment(x = -3, y = -.1, xend = -3, yend = .1, color = "darkgrey", size = 2) + | |||
geom_segment(x = -2, y = -.1, xend = -2, yend = .1, color = "darkgrey", size = 2) + | |||
geom_segment(x = -1, y = -.1, xend = -1, yend = .1, color = "darkgrey", size = 2) + | |||
geom_segment(x = 0, y = -.1, xend = 0, yend = .1, color = "darkgrey", size = 2) + | |||
geom_segment(x = 1, y = -.1, xend = 1, yend = .1, color = "darkgrey", size = 2) + | |||
geom_segment(x = 2, y = -.1, xend = 2, yend = .1, color = "darkgrey", size = 2) + | |||
geom_segment(x = 3, y = -.1, xend = 3, yend = .1, color = "darkgrey", size = 2) + | |||
aes(x = x, y = y, colour = colour) + | |||
geom_point(size = 2) + | |||
scale_x_continuous(limits = c(-4, 4)) + | |||
scale_y_continuous(limits = c(-2, 2)) + | |||
theme_void() + | |||
theme(legend.position = "none", | |||
plot.background = element_rect(fill = "transparent", colour = "transparent")) -> | |||
manyplot | |||
ggsave(plot = manyplot, filename = "many.png", bg = "transparent") | |||
# Série temporelle ---- | |||
## Figure introduction | |||
library(astsa) | |||
data(flu) #chargement de la base "flu" du package astsa | |||
## Représentation graphique de la série temporelle | |||
tsplot(flu,xlab = "Time", ylab = "Deaths per 10,000 people", | |||
main="Monthly pneumonia and influenza deaths per 10,000 people \n in the United States for 11 years, 1968 to 1978") | |||
## Simulation série | |||
### Simulation des paramêtres temps, tendance, saisonalité, bruit | |||
t = time(cmort) | |||
m = 0.2*t+2.1 | |||
s = cos(5*t) | |||
epsilon = rnorm(length(t),0,0.5) | |||
### Simulation des séries temporelles comme modèle additif/multiplicatif/Hybride | |||
simu.ts.additif = ts(m+s+epsilon,frequency = 52, start = c(1970, 1) ) | |||
tsplot(simu.ts.additif) | |||
simu.ts.multiplictif = ts(m*s*epsilon,frequency = 52, start = c(1970, 1) ) | |||
tsplot(simu.ts.multiplictif) | |||
simu.ts.hybride = ts(m*s+epsilon*100,frequency = 52, start = c(1970, 1) ) | |||
tsplot(simu.ts.hybride) | |||
#### Courbes | |||
plot(t,m, type = "l", main = "Tendance", xlab = "Temps", ylab = "") | |||
plot(t,s, type = "l", main = "Saisonnalité", xlab = "Temps", ylab = "") | |||
plot(t,epsilon, type = "l", main = "Bruit", xlab = "Temps", ylab = "") | |||
### Décomposition des signaux simulés | |||
plot(decompose(simu.ts.additif, type="additive")) #décomposition trend/seasonnality/noise, modèle additif | |||
plot(stl(simu.ts.additif, "periodic")) | |||
plot(decompose(decompose(simu.ts.additif, type="additive")$random, type="additive")) | |||
## Simulation modèles AR et MA | |||
### Simulation AR(1) : X_t = 0.6X_{t???1} + ??_t | |||
X=arima.sim(n = 2400, list(ar = 0.6),sd = 1) | |||
plot(acf(X),lwd=2, main = "Acf AR(1)") | |||
plot(pacf(X),lwd=2, main = "Pacf AR(1)") | |||
### Simulation AR(2) : Xt = 0.6X_{t???1} - 0.4X_{t???2} + ??t | |||
X=arima.sim(n = 2400, list(ar = c(0.6,-0.4)),sd = 1) | |||
plot(acf(X),lwd=2, main = "Acf AR(2)") | |||
plot(pacf(X),lwd=2, main = "Pacf AR(2)") | |||
### Simulation MA(1) X_t = ??_t + 0.7??_{t???1} | |||
X=arima.sim(n = 2400, list(ma = .7),sd = 1) | |||
plot(acf(X),lwd=2, main = "Acf MA(1)") | |||
plot(pacf(X),lwd=2, main = "Pacf MA(1)") |