library(tidyverse)

# Un temps ----

## Data
data(iris)
data(mtcars)

mtcars %>%
  mutate_at(vars(am, cyl, vs, gear), factor) ->
mtcars

## Deux groupes

### Plots

boxplot(mpg ~ am, data = mtcars)

ggplot(mtcars) +
  aes(x = am, y = mpg) +
  geom_boxplot() +
  geom_point(alpha = .5)

### Paramétrique
t.test(mpg ~ am, data = mtcars, var.equal = T)
oneway.test(mpg ~ am, data = mtcars, var.equal = T)

### Non-paramétrique
wilcox.test(mpg ~ am, data = mtcars)

## Plusieurs groupes

### Plots
iris %>%
  ggplot() +
  aes(x = Species, y = Sepal.Width) +
  geom_violin() +
  geom_point(position = "jitter", alpha = .5)

iris %>%
  ggplot() +
  aes(x = Petal.Length) +
  geom_histogram(data = iris %>% select(-Species)) +
  geom_histogram(aes(fill = Species)) +
  facet_grid(~Species)

### Paramétrique
oneway.test(Sepal.Width ~ Species, data = iris)

### Non-paramétrique
kruskal.test(Sepal.Width ~ Species, data = iris)

## Multivarié

### Régression linéaire

# Y = a₁X₁ + a₂X₂ + _ + anXn + ε
# Y = Σ aiXi + ε, ε -> N
plot(mtcars)

fit <- lm(mpg ~ am, data = mtcars)
summary(fit)
plot(fit)

fit <- lm(mpg ~ cyl + disp + hp + drat + wt + vs + am, data = mtcars)
summary(fit)
plot(fit)

### Régression logistique
fit <- glm(am ~ mpg + drat + wt, data = mtcars, family = "binomial")
summary(fit)
plot(fit)

# Deux temps ----

## Data
data(ChickWeight)

### Test apparié

#### Plots
ChickWeight %>%
  filter(Time %in% c(0, 2)) %>%
  ggplot() +
  aes(x = Time, y = weight, group = Chick, color = Diet) +
  geom_point() +
  geom_line()

#### Test
ChickWeight %>%
  group_by(Chick) %>%
  summarise(maxTime = max(Time)) %>%
  summarise(minTime = min(maxTime))

t.test(ChickWeight$weight[ChickWeight$Time == 0], ChickWeight$weight[ChickWeight$Time == 2], paired = T)

### Test apparié à la main
ChickWeight %>%
  filter(Time %in% c(0, 2)) %>%
  group_by(Chick) %>%
  summarise(weightDiff = last(weight) - first(weight),
            Diet = first(Diet)) -> deuxTemps

#### Plots
deuxTemps %>%
  ggplot() +
  aes(x = weightDiff, fill = Diet) +
  geom_density(alpha = .25) #, position = "fill") # position = "stack" "fill"

oneway.test(weightDiff ~ Diet, data = deuxTemps)
kruskal.test(weightDiff ~ Diet, data = deuxTemps)

# Survie ----
library(survival)

## Data
data(lung)

## Bivarié
lung$Surv <- Surv(lung$time, lung$status)
survfit(Surv ~ 1, data = lung) -> fit

plot(fit)

survfit(Surv ~ sex, data = lung) -> fit
plot(fit, conf.int = T, col = c("blue", "red"))
legend("topright", legend = c("Hommes", "Femmes"), col = c("blue", "red"), lty = 1)

survdiff(Surv ~ sex, data = lung)

## Cox
coxph(Surv ~ sex + age + ph.ecog + ph.karno + meal.cal + wt.loss, data = lung) -> fit
cox.zph(fit) %>% plot

# N temps ----
library(lme4)
library(lmerTest)

## Data
data(ChickWeight)

## Plots
ChickWeight %>%
  ggplot() +
  aes(x = Time, y = weight, group = Chick, color = Diet) %>%
  geom_line()#alpha = .5) +
  geom_smooth(aes(x = Time, y = weight, color = Diet))

## Test
lmer(weight ~ Diet + (1|Time) + (1|Chick), data = ChickWeight) -> fit

summary(fit)
plot(fit)

# Trajectoires ----
library(TraMineR)
library(cluster)

## Data
data(mvad)

## Description
mvad.seq <- seqdef(mvad, 17:86, xtstep = 12)
plot(mvad.seq, 1:20)
seqdplot(mvad.seq)

## Clustering
mvad.om <- seqdist(mvad.seq, method = "OM", sm = "TRATE")

clusterward <- agnes(mvad.om, diss = T, method = "ward")
mvad.cl <- cutree(clusterward, k = 4)
cl.lab <- factor(mvad.cl, labels = paste("Cluster", 1:4))
seqdplot(mvad.seq, group = cl.lab)
seqfplot(mvad.seq, group = cl.lab)
seqmsplot(mvad.seq, group = cl.lab)

## Analyse
mvad.ent <- seqient(mvad.seq)
lm.ent <- lm(mvad.ent ~ male + funemp + gcse5eq, data = mvad)
summary(lm.ent)

# Séries temporelles ----
library(astsa)
library(Rssa)
library(zoo)
library(TSA)

## Data
data(cmort)

## Description des données
is.ts(cmort) #on vérifie qu'il s'agit bien d'un objet série temporelle
start(cmort) #Première observation
end(cmort) #Dernière observation
frequency(cmort) #Fréquance des observations
deltat(cmort) #intervalle de temps entre deux observations

length(cmort)
head(cmort) #visualisation des premier éléments de l'objet cmort

tsplot(cmort) #visualisation graphique de la série


## Méthode de décomposition 
MannKendall(cmort) #test de tendance : significatif donc il existe bien une tendance

### Modèle additif
D_additif1 = decompose(cmort, "additive")
plot(D_additif1)

### Modèle additif(autre méthode)
D_additif2 = stl(cmort, "periodic")
plot(D_additif2)

### Modèle multiplicatif
D_multiplicatif = decompose(cmort, "multiplicative")
plot(D_multiplicatif)

## Plots

#Jaune : signal recomposé
#Noir pointillé signal initial 
graph = function(var){
  plot(var, lwd = 2, col = "#FFC000", main = substitute(var))
  lines(cmort,lty = 2)
}

#Reconstitution signal avec décomposition additive et bruit
cmort_ad1 = D_additif1$trend + D_additif1$seasonal  + D_additif1$random
graph(cmort_ad1)
#Reconstitution signal avec décomposition additive sans bruit
cmort_ad1_ssrandom = D_additif1$trend + D_additif1$seasonal  
graph(cmort_ad1_ssrandom)


#Reconstitution signal avec décomposition additive et bruit
cmort_ad2 = D_additif2$time.series[,1] + D_additif2$time.series[,2]  + D_additif2$time.series[,3]
graph(cmort_ad2)
#Reconstitution signal avec décomposition additive sans bruit
cmort_ad2_ssrandom = D_additif2$time.series[,1] + D_additif2$time.series[,2]  
graph(cmort_ad2_ssrandom)

#Reconstitution signal avec décomposition multiplicative et bruit
cmort_mult = D_multiplicatif$trend*D_multiplicatif$seasonal*D_multiplicatif$random
graph(cmort_mult)
#Reconstitution signal avec décomposition multiplicative sans bruit
cmort_mult_ssrandom = D_multiplicatif$trend*D_multiplicatif$seasonal
graph(cmort_mult_ssrandom)


### Autocorrélation et Modélisation 

#autocorrélation
acf(cmort)

#autocorrélation partielle
pacf(cmort)


### Analyse spectrale 

#périodogramme 
p = periodogram(cmort)
#Trouver la période : 
# Définir la fréquence où le spectre est maximum
# Période : 1/Freq
frequence = p$freq[which.max(p$spec)]
Periode = 1/frequence

#Lissage par moyenne mobile 
plot(cmort, ylab = "cmort", main = "Lissage par moyenne glissante")
lines(filter(cmort, rep(1/20, 20), side = 1), col = "#FFC000", lwd = 2)
lines(filter(cmort, rep(1/12, 12), side = 1), col = "red", lwd = 2)
lines(filter(cmort, rep(1/50, 50), side = 1), col = "indianred4", lwd = 2)
legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("1/12", "1/20", "1/100"), lwd = c(2,2,2))

#Lissage par noyaux
#Calcul de 3 noyaux
k1 <- kernel("daniell", 10)
k2 <- kernel("daniell", 5)
k3 <- kernel("daniell", 30)
#Application des 3 noyaux aux données
x1 <- kernapply(cmort, k1)
x2 <- kernapply(cmort, k2)
x3 <- kernapply(cmort, k3)
#Graphiques
plot(cmort, main = "Lissage par noyaux")
lines(x1, col = "#FFC000", lwd = 2)
lines(x2, col = "red", lwd = 2)
lines(x3, col = "indianred4", lwd = 2)
legend("topright", col = c("red", "#FFC000", "indianred4"), legend = c("5", "10", "30"), lwd = c(2,2,2))


### Série temporelle multiple 

#Graphique 
serie = cbind(lap[,10],cmort)
colnames(serie)[1] = "ozone"
plot(serie)
abline(v = 1970:1980, col ="#FFC000", lty = 2, lwd = 2)

#Test de causalité de Granger
grangertest(cmort ~ lap[,4]) #Température
grangertest(cmort ~ lap[,5]) #Humidité relative
grangertest(cmort ~ lap[,6]) #Monoxyde de carbone
grangertest(cmort ~ lap[,7]) #Dioxyde de soufre
grangertest(cmort ~ lap[,8]) #Dioxyde d'azote
grangertest(cmort ~ lap[,9]) #Hydrocarbure
grangertest(cmort ~ lap[,10]) #Ozone
grangertest(cmort ~ lap[,11]) #Particule