Antoine Neuraz před 5 roky
rodič
revize
27aa1e5527
10 změnil soubory, kde provedl 426 přidání a 201 odebrání
  1. +10
    -0
      courses/07_time_text.Rmd
  2. +25
    -49
      courses/07_time_text.html
  3. +26
    -28
      courses/bar-chart-race.Rmd
  4. binární
      courses/bar_race.gif
  5. +106
    -0
      courses/lab01-correction.Rmd
  6. +127
    -0
      courses/lab7-temporal_data.Rmd
  7. +132
    -0
      courses/lab7-temporal_data.html
  8. binární
      courses/lab7-temporal_data_files/figure-html/unnamed-chunk-2-1.png
  9. binární
      courses/lab7-temporal_data_files/figure-html/unnamed-chunk-3-1.png
  10. +0
    -124
      courses/line-charts.Rmd

+ 10
- 0
courses/07_time_text.Rmd Zobrazit soubor

@@ -20,6 +20,16 @@ library(dplyr)
library(stringi)
```

class: center, middle, title

# Visualisation de données textuelles et temporelles

### 2019-2020

## Dr. Antoine Neuraz

### AHU Informatique médicale
#### Hôpital Necker-Enfants malades, </br> Université de Paris

---
class: center, full


+ 25
- 49
courses/07_time_text.html Zobrazit soubor

@@ -1,8 +1,8 @@
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
<html>
<head>
<title>Cours 07 - Données temporelles et textuelles</title>
<meta charset="utf-8" />
<meta charset="utf-8">
<meta name="author" content="Antoine Neuraz" />
<link href="libs/remark-css-0.0.1/default.css" rel="stylesheet" />
<link rel="stylesheet" href="css/my_style.css" type="text/css" />
@@ -162,11 +162,29 @@ class: full, center
[interactive](https://visual.ly/community/interactive-graphic/business/house-hunting)

---
class: center
## Gapminder

&lt;iframe width="800" height="400" src="https://www.youtube.com/embed/BPt8ElTQMIg" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen&gt;&lt;/iframe&gt;

---
## divide and conquer

![](img/walmart.png)

---
## Streamgraph

![](img/streamgraph.png)

---
class: center
## Eventflow

&lt;iframe width="800" height="400" src="https://www.youtube.com/embed/ZN1BefRmBMc" frameborder="0" allow="accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture" allowfullscreen&gt;&lt;/iframe&gt;

[Eventlow](https://hcil.umd.edu/eventflow/)
</textarea>
<style data-target="print-only">@media screen {.remark-slide-container{display:block;}.remark-slide-scaler{box-shadow:none;}}</style>
<script src="https://remarkjs.com/downloads/remark-latest.min.js"></script>
<script src="addons/macros.js"></script>
<script>var slideshow = remark.create({
@@ -177,57 +195,16 @@ class: full, center
if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {
window.dispatchEvent(new Event('resize'));
});
(function(d) {
var s = d.createElement("style"), r = d.querySelector(".remark-slide-scaler");
(function() {
var d = document, s = d.createElement("style"), r = d.querySelector(".remark-slide-scaler");
if (!r) return;
s.type = "text/css"; s.innerHTML = "@page {size: " + r.style.width + " " + r.style.height +"; }";
d.head.appendChild(s);
})(document);

(function(d) {
var el = d.getElementsByClassName("remark-slides-area");
if (!el) return;
var slide, slides = slideshow.getSlides(), els = el[0].children;
for (var i = 1; i < slides.length; i++) {
slide = slides[i];
if (slide.properties.continued === "true" || slide.properties.count === "false") {
els[i - 1].className += ' has-continuation';
}
}
var s = d.createElement("style");
s.type = "text/css"; s.innerHTML = "@media print { .has-continuation { display: none; } }";
d.head.appendChild(s);
})(document);
// delete the temporary CSS (for displaying all slides initially) when the user
// starts to view slides
(function() {
var deleted = false;
slideshow.on('beforeShowSlide', function(slide) {
if (deleted) return;
var sheets = document.styleSheets, node;
for (var i = 0; i < sheets.length; i++) {
node = sheets[i].ownerNode;
if (node.dataset["target"] !== "print-only") continue;
node.parentNode.removeChild(node);
}
deleted = true;
});
})();</script>

<script>
(function() {
var links = document.getElementsByTagName('a');
for (var i = 0; i < links.length; i++) {
if (/^(https?:)?\/\//.test(links[i].getAttribute('href'))) {
links[i].target = '_blank';
}
}
})();
</script>

<script>
slideshow._releaseMath = function(el) {
var i, text, code, codes = el.getElementsByTagName('code');
var i, text, code, codes = document.getElementsByTagName('code');
for (i = 0; i < codes.length;) {
code = codes[i];
if (code.parentNode.tagName !== 'PRE' && code.childElementCount === 0) {
@@ -241,8 +218,7 @@ slideshow._releaseMath = function(el) {
}
i++;
}
};
slideshow._releaseMath(document);
})();
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>


+ 26
- 28
courses/bar-chart-race.Rmd Zobrazit soubor

@@ -26,13 +26,12 @@ library(gganimate)
Load data
```{r load_data}
# load data
data("base_pop")
data("us_city_populations")

# create the ranks based on census populations
base_pop<- base_pop %>%
group_by( annee) %>%
arrange(desc(population)) %>%
mutate(rang = 1:n()) %>%
# create the ranks based on census Populations
us_city_populations<-us_city_populations %>%
group_by( Year) %>%
arRanke(desc(Population)) %>%
ungroup()

```
@@ -42,8 +41,8 @@ Extract the list of top n cities
```{r extract_top_n}
n_cities = 10

top_cities <- base_pop %>% filter(rang <= n_cities) %>%
select(ville, dep, region) %>% distinct()
top_cities <-us_city_populations %>% filter(Rank <= n_cities) %>%
select(City, State, Region) %>% distinct()

```

@@ -51,31 +50,31 @@ top_cities <- base_pop %>% filter(rang <= n_cities) %>%
Create all missing dates
```{r, combine_dates}
# create a data frame with all the years between min and max Year
all_years <- data.frame(annee = seq(min(base_pop$annee), max(base_pop$annee), 1))
all_years <- data.frame(Year = seq(min(us_city_populations$Year), max(us_city_populations$Year), 1))

# combine top_cities and all_years
all_combos <- merge(top_cities, all_years, all = T)

# combine all_combos with the original dataset
res_interp <- merge(base_pop, all_combos, all.y = T)
res_interp <- merge(us_city_populations, all_combos, all.y = T)
```

Interpolate the populations when missing (linear interpolation here)
Interpolate the Populations when missing (linear interpolation here)
```{r, interpolate}
res_interp <- res_interp %>%
group_by(ville) %>%
mutate(population=approx(annee,population,annee)$y)
group_by(City) %>%
mutate(Population=approx(Year,Population,Year)$y)
```


```{r, filter_for_plot}
# filter the top ten cities per year
to_plot <- res_interp %>%
group_by(annee) %>%
arrange(-population) %>%
mutate(rang=row_number()) %>%
filter(rang<=n_cities) %>%
rename(`Région` = region)
group_by(Year) %>%
arrange(-Population) %>%
mutate(Rank=row_number()) %>%
filter(Rank<=n_cities) %>%
rename(`Région` = Region)

```

@@ -84,10 +83,10 @@ Make the plot
```{r, make_plot}
p <- to_plot %>%

ggplot(aes(x = -rang,y = population, group =ville)) +
geom_tile(aes(y = population / 2, height = population, fill = `Région`), width = 0.9) +
geom_text(aes(label = ville), hjust = "right", colour = "white", fontface="bold", nudge_y = -10000) +
geom_text(aes(label = scales::comma(population,big.mark = ' ')), hjust = "left", nudge_y = 10000, colour = "grey90") +
ggplot(aes(x = -Rank,y = Population, group =City)) +
geom_tile(aes(y = Population / 2, height = Population, fill = `Région`), width = 0.9) +
geom_text(aes(label = City), hjust = "right", colour = "white", fontface="bold", nudge_y = -100000) +
geom_text(aes(label = scales::comma(Population,big.mark = ' ')), hjust = "left", nudge_y = 100000, colour = "grey90") +
coord_flip(clip="off") +
hrbrthemes::scale_fill_ipsum() +
scale_x_discrete("") +
@@ -104,17 +103,16 @@ p <- to_plot %>%
legend.text = element_text(size = 15),
legend.background = element_blank()) +
# gganimate code to transition by year:
transition_time(annee) +
transition_time(Year) +
ease_aes('cubic-in-out') +
labs(title='Evolution des plus grandes villes de France',
subtitle='Population en {round(frame_time,0)}',
caption='Source: INSEE Base populations historiques 1876-2015')
labs(title='Evolution des plus grandes villes US',
subtitle='Population en {round(frame_time,0)}')

```

```{r, animate}
animate(p, nframes = 350, fps = 25, end_pause = 30, width = 1200, height = 1200, start_pause = 15 )
animate(p, nframes = 300, fps = 25, end_pause = 30, width = 1200, height = 1200, start_pause = 15 ,)
anim_save("bar_race.gif", animation = last_animation())
```



binární
courses/bar_race.gif Zobrazit soubor

Před Za
Šířka: 1200  |  Výška: 1200  |  Velikost: 6.4MB

+ 106
- 0
courses/lab01-correction.Rmd Zobrazit soubor

@@ -0,0 +1,106 @@
---
title: "correction"
author: "Antoine Neuraz"
date: "18/11/2019"
output: html_document
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

## Ouvrir le dataset "mtcars"

## représenter le "Gross horsepower" en fonction du nombre de cylindres

```{r}
data("mtcars")

ggplot(data = mtcars,
aes(x = as.factor(cyl),
y = hp)) +
geom_jitter(width = .2)
```


## utiliser l'encodage multiple sur le nombre de cylindres

```{r}
ggplot(data = mtcars,
aes(x = as.factor(cyl),
y = hp,
size = cyl,
color = cyl)) +
geom_jitter(width = .2, alpha = .6) +
theme_minimal() +
theme(legend.position = "none")
```

## ajouter l'information du nombre de carburateurs

```{r}

ggplot(data = mtcars,
aes(x = as.factor(cyl),
y = carb,
size = hp,
color = hp)) +
geom_jitter(width = .2, alpha = .6) +
theme_minimal()
#facet_grid(~as.factor(carb))
#theme(legend.position = "none")
```

## Paufiner le plot (axes, titres, thème)

```{r}

ggplot(data = mtcars,
aes(x = as.factor(cyl),
y = carb,
size = hp,
color = hp)) +
geom_jitter(width = .2, alpha = .6) +
theme_minimal() +
labs(x = "Cylinders",
y = "Carburators")
```

## représenter la distribution du nombre de miles per gallon en histogramme

```{r}
ggplot(mtcars,
aes(x= mpg)) +
geom_histogram(bins = sqrt(nrow(mtcars)))
```

## représenter la distribution du nombre de miles per gallon en boxplot

```{r}
ggplot(mtcars,
aes(x= 1, y= mpg)) +
geom_boxplot()
```

## representer la distribution du nombre de miles per gallon en fonction du nombre de cylindres

```{r}
ggplot(mtcars,
aes(x= as.factor(cyl), y= mpg)) +
geom_violin(fill = "grey70")
```

## ajouter les points par dessus la distribution

```{r}
ggplot(mtcars,
aes(x= as.factor(cyl), y= mpg)) +
geom_violin(fill = "grey70") +
geom_jitter(aes(color = cyl),width = .15)
```


## paufiner le plot (axes, titres, thème)




+ 127
- 0
courses/lab7-temporal_data.Rmd Zobrazit soubor

@@ -0,0 +1,127 @@
---
title: "Lab 07 - Données temporelles et textuelles"
author: "Antoine Neuraz"
date: "22/11/2019"
output:
xaringan::moon_reader:
css: ['default','css/my_style.css']
lib_dir: libs
seal: false
nature:
ratio: '4:3'
countIncrementalSlides: false
self-contained: true
beforeInit: "addons/macros.js"
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.asp = .7, fig.width = 12)
library(vizoR)
library(ggplot2)
library(gghighlight)
library(dplyr)
library(ggTimeSeries)
```

## TODO

#### 1. charger le dataset `us_city_populations` de la librairie `vizoR`

#### 2. tracer un line chart de l'évolution de la population des villes US

#### 3. Mettez en évidence les 5 plus grandes villes (hint: package gghighlight)
[introduction gghighlight](https://cran.r-project.org/web/packages/gghighlight/vignettes/gghighlight.html)

#### 4. Appliquez les principes de design de Tufte

##### 5. BONUS: affichez le nom des villes directement à la fin de la ligne

#### 6. Réalisez un streamgraph des 5 plus grandes villes US (hint: package ggTimeSeries)

---

## TODO 2

#### Trouver une 3e visualization pertinente pour montrer l'évolution de la population des villes US.

---

```{r}

data("us_city_populations")

n_cities = 5

# top_cities <-
# us_city_populations %>%
# filter(Rank <= n_cities) %>%
# select(City, State, Region) %>%
# distinct()
#
# to_plot <- filter(us_city_populations, City %in% top_cities$City)

#to_plot <- us_city_populations

last_ranks <- us_city_populations %>%
filter(Year == max(Year)) %>%
mutate(last_rank = Rank) %>%
select(City, last_rank)

to_plot <- left_join(us_city_populations, last_ranks, by= 'City')

right_axis <- to_plot %>%
group_by(City) %>%
top_n(1, Year) %>%
ungroup() %>%
top_n(n_cities, -last_rank)

ends <- right_axis %>%
pull(Population)

labels <- right_axis %>%
pull(City)

```

---
class: full
```{r, echo = FALSE}
ggplot(to_plot, aes(x=Year, y = Population, group = City, color = City)) +
geom_line(size=1) +
#geom_text(data = subset(to_plot, Year == 2010), aes(x=Inf, y = Population, label=City), hjust = 1) +
scale_x_continuous("", expand=c(0,0))+
scale_y_continuous("",
labels=scales::comma_format(big.mark = " "),
sec.axis = sec_axis(~ ., breaks = ends, labels = labels ))+
scale_color_viridis_d()+
theme_elegant_dark()+
theme(legend.position = "none",
plot.margin = unit(c(1,3,1,1), "lines"),
axis.line.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_line(),
panel.grid.major.y = element_line(color= 'grey30', size = .2) ) +
gghighlight(max(last_rank) <= n_cities, use_direct_label = FALSE, label_key = City,unhighlighted_colour = "grey20")
```
---
class: full
```{r, echo = FALSE}
library(ggTimeSeries)
to_plot %>% filter(City %in% labels) %>%
ggplot(aes(x = Year, y = Population, group = City, fill = City)) +
scale_y_continuous("", labels = scales::comma_format(big.mark = " "))+
stat_steamgraph() +
theme_elegant_dark() +
scale_fill_viridis_d() +
theme(plot.margin = unit(c(1,3,1,1), "lines"),
axis.line.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_line(),
panel.grid.major.y = element_line(color= 'grey30', size = .2) )

```
---

![](bar_race.gif)


+ 132
- 0
courses/lab7-temporal_data.html Zobrazit soubor

@@ -0,0 +1,132 @@
<!DOCTYPE html>
<html>
<head>
<title>Lab 07 - Données temporelles et textuelles</title>
<meta charset="utf-8">
<meta name="author" content="Antoine Neuraz" />
<link href="libs/remark-css-0.0.1/default.css" rel="stylesheet" />
<link rel="stylesheet" href="css/my_style.css" type="text/css" />
</head>
<body>
<textarea id="source">




## TODO

#### 1. charger le dataset `us_city_populations` de la librairie `vizoR`

#### 2. tracer un line chart de l'évolution de la population des villes US

#### 3. Mettez en évidence les 5 plus grandes villes (hint: package gghighlight)
[introduction gghighlight](https://cran.r-project.org/web/packages/gghighlight/vignettes/gghighlight.html)

#### 4. Appliquez les principes de design de Tufte

##### 5. BONUS: affichez le nom des villes directement à la fin de la ligne

#### 6. Réalisez un streamgraph des 5 plus grandes villes US (hint: package ggTimeSeries)

---

## TODO 2

#### Trouver une 3e visualization pertinente pour montrer l'évolution de la population des villes US.

---


```r
data("us_city_populations")

n_cities = 5

# top_cities &lt;-
# us_city_populations %&gt;%
# filter(Rank &lt;= n_cities) %&gt;%
# select(City, State, Region) %&gt;%
# distinct()
#
# to_plot &lt;- filter(us_city_populations, City %in% top_cities$City)

#to_plot &lt;- us_city_populations

last_ranks &lt;- us_city_populations %&gt;%
filter(Year == max(Year)) %&gt;%
mutate(last_rank = Rank) %&gt;%
select(City, last_rank)

to_plot &lt;- left_join(us_city_populations, last_ranks, by= 'City')

right_axis &lt;- to_plot %&gt;%
group_by(City) %&gt;%
top_n(1, Year) %&gt;%
ungroup() %&gt;%
top_n(n_cities, -last_rank)

ends &lt;- right_axis %&gt;%
pull(Population)

labels &lt;- right_axis %&gt;%
pull(City)
```

---
class: full
![](lab7-temporal_data_files/figure-html/unnamed-chunk-2-1.png)&lt;!-- --&gt;
---
class: full
![](lab7-temporal_data_files/figure-html/unnamed-chunk-3-1.png)&lt;!-- --&gt;
---

![](bar_race.gif)
</textarea>
<script src="https://remarkjs.com/downloads/remark-latest.min.js"></script>
<script src="addons/macros.js"></script>
<script>var slideshow = remark.create({
"ratio": "4:3",
"countIncrementalSlides": false,
"self-contained": true
});
if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {
window.dispatchEvent(new Event('resize'));
});
(function() {
var d = document, s = d.createElement("style"), r = d.querySelector(".remark-slide-scaler");
if (!r) return;
s.type = "text/css"; s.innerHTML = "@page {size: " + r.style.width + " " + r.style.height +"; }";
d.head.appendChild(s);
})();</script>

<script>
(function() {
var i, text, code, codes = document.getElementsByTagName('code');
for (i = 0; i < codes.length;) {
code = codes[i];
if (code.parentNode.tagName !== 'PRE' && code.childElementCount === 0) {
text = code.textContent;
if (/^\\\((.|\s)+\\\)$/.test(text) || /^\\\[(.|\s)+\\\]$/.test(text) ||
/^\$\$(.|\s)+\$\$$/.test(text) ||
/^\\begin\{([^}]+)\}(.|\s)+\\end\{[^}]+\}$/.test(text)) {
code.outerHTML = code.innerHTML; // remove <code></code>
continue;
}
}
i++;
}
})();
</script>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement('script');
script.type = 'text/javascript';
script.src = 'https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML';
if (location.protocol !== 'file:' && /^https?:/.test(script.src))
script.src = script.src.replace(/^https?:/, '');
document.getElementsByTagName('head')[0].appendChild(script);
})();
</script>
</body>
</html>

binární
courses/lab7-temporal_data_files/figure-html/unnamed-chunk-2-1.png Zobrazit soubor

Před Za
Šířka: 864  |  Výška: 604  |  Velikost: 69KB

binární
courses/lab7-temporal_data_files/figure-html/unnamed-chunk-3-1.png Zobrazit soubor

Před Za
Šířka: 864  |  Výška: 604  |  Velikost: 42KB

+ 0
- 124
courses/line-charts.Rmd Zobrazit soubor

@@ -1,124 +0,0 @@
---
title: "Line charts for population data"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{bar-chart-race}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(vizoR)
library(ggplot2)
library(gghighlight)
library(dplyr)
```


```{r}

data("us_city_populations")

n_cities = 5

top_cities <- us_city_populations %>% filter(Rank <= n_cities) %>%
select(City, State, Region) %>% distinct()

to_plot <- filter(us_city_populations, City %in% top_cities$City)
to_plot <- us_city_populations

last_ranks <- to_plot %>% filter(Year == max(Year)) %>%
mutate(last_rank = Rank) %>% select(City, last_rank)

to_plot <- left_join(to_plot, last_ranks, by= 'City')

right_axis <- to_plot %>%
group_by(City) %>%
top_n(1, Year) %>%
ungroup() %>%
top_n(n_cities, -last_rank)

ends <- right_axis %>%
pull(Population)

labels <- right_axis %>%
pull(City)

```

```{r}
ggplot(to_plot, aes(x=Year, y = Population, group = City, color = Region)) +
geom_line(size=1) +
#geom_text(data = subset(to_plot, Year == 2010), aes(x=Inf, y = Population, label=City), hjust = 1) +
scale_x_continuous("", expand=c(0,0))+
scale_y_continuous("",
labels=scales::comma_format(big.mark = " "),
sec.axis = sec_axis(~ ., breaks = ends, labels = labels ))+
theme_elegant() +
theme(legend.position = "bottom",
plot.margin = unit(c(1,3,1,1), "lines"),
axis.line.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_line(),
panel.grid.major.y = element_line(color= 'grey90', size = .2) ) +
gghighlight(max(last_rank) <= n_cities, use_direct_label = FALSE, label_key = City)
```

```{r}

data("base_pop")

n_cities = 10

# create the ranks based on census populations
base_pop<- base_pop %>%
group_by( annee) %>%
arrange(desc(population)) %>%
mutate(rang = 1:n()) %>%
ungroup()

top_cities <- base_pop %>% filter(rang <= n_cities) %>%
select(ville, dep , region) %>% distinct()

to_plot <- filter(base_pop, ville %in% top_cities$ville)
#to_plot <- base_pop

last_ranks <- to_plot %>% filter(annee == max(annee)) %>%
mutate(last_rank = rang) %>% select(ville, last_rank)

to_plot <- left_join(to_plot, last_ranks, by= 'ville')

right_axis <- to_plot %>%
group_by(ville) %>%
top_n(1, annee) %>%
ungroup() %>%
top_n(n_cities, -last_rank)

ends <- right_axis %>%
pull(population)

labels <- right_axis %>%
pull(ville)

```

```{r}
ggplot(to_plot, aes(x=annee, y = population, group = ville, color = region)) +
geom_line(size=1) +
#geom_text(data = subset(to_plot, Year == 2010), aes(x=Inf, y = Population, label=City), hjust = 1) +
scale_x_continuous("", expand=c(0,0))+
scale_y_continuous("",
labels=scales::comma_format(big.mark = " "),
sec.axis = sec_axis(~ ., breaks = ends, labels = labels ))+
theme_elegant() +
theme(legend.position = "bottom",
plot.margin = unit(c(1,3,1,1), "lines"),
axis.line.y = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_line(),
panel.grid.major.y = element_line(color= 'grey90', size = .2) ) +
gghighlight(max(last_rank) <= n_cities, use_direct_label = FALSE, label_key = ville)
```

Načítá se…
Zrušit
Uložit