|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244 |
- ---
- title: "Lab 07 - Données temporelles et textuelles"
- author: "Antoine Neuraz"
- date: "22/11/2019"
- output:
- xaringan::moon_reader:
- css: ['default','css/my_style.css']
- lib_dir: libs
- seal: false
- nature:
- ratio: '4:3'
- countIncrementalSlides: false
- self-contained: true
- beforeInit: "addons/macros.js"
- ---
-
- ```{r setup, include=FALSE}
- knitr::opts_chunk$set(echo = TRUE, fig.asp = .7, fig.width = 12)
- library(vizoR)
- library(ggplot2)
- library(gghighlight)
- library(dplyr)
- library(ggTimeSeries)
- library(hrbrthemes)
- library(gganimate)
- ```
-
- ## TODO
-
- #### 1. charger le dataset `us_city_populations` de la librairie `vizoR`
-
- #### 2. tracer un line chart de l'évolution de la population des villes US
-
- #### 3. Mettez en évidence les 5 plus grandes villes (hint: package gghighlight)
- [introduction gghighlight](https://cran.r-project.org/web/packages/gghighlight/vignettes/gghighlight.html)
-
- #### 4. Appliquez les principes de design de Tufte
-
- ##### 5. BONUS: affichez le nom des villes directement à la fin de la ligne
-
- #### 6. Réalisez un streamgraph des 5 plus grandes villes US (hint: package ggTimeSeries)
-
- ---
-
- ## TODO 2
-
- #### Trouver une 3e visualization pertinente pour montrer l'évolution de la population des villes US.
-
- ---
-
- ```{r}
- data("us_city_populations")
-
- n_cities = 5
-
- last_ranks <- us_city_populations %>%
- filter(Year == max(Year)) %>%
- mutate(last_rank = Rank) %>%
- select(City, last_rank)
-
- to_plot <- left_join(us_city_populations, last_ranks, by= 'City')
-
- right_axis <- to_plot %>%
- group_by(City) %>%
- top_n(1, Year) %>%
- ungroup() %>%
- top_n(n_cities, -last_rank)
-
- ends <- right_axis %>%
- pull(Population)
-
- labels <- right_axis %>%
- pull(City)
-
- ```
-
- ---
- class: full
- ```{r, echo = TRUE}
- p <- ggplot(to_plot, aes(x=Year, y = Population,
- group = City, color = City)) +
- geom_line(size=1) +
- scale_x_continuous("", expand=c(0,0))+
- scale_y_continuous("",
- labels=scales::comma_format(big.mark = " "),
- sec.axis = sec_axis(~ .,
- breaks = ends,
- labels = labels ))+
- scale_color_viridis_d()+
- theme_elegant_dark()+
- theme(legend.position = "none",
- plot.margin = unit(c(1,3,1,1), "lines"),
- axis.line.y = element_blank(),
- axis.line.x = element_blank(),
- axis.ticks.x = element_line(),
- panel.grid.major.y = element_line(color= 'grey30', size = .2) ) +
- gghighlight(max(last_rank) <= n_cities,
- use_direct_label = FALSE,
- label_key = City,
- unhighlighted_colour = "grey20")
-
- ```
- ---
- class: full
- ```{r, echo = TRUE}
- p
- ```
- ---
-
- class: full
- ```{r, echo = TRUE}
- library(ggTimeSeries)
- p <- to_plot %>% filter(City %in% labels) %>%
- ggplot(aes(x = Year, y = Population, group = City, fill = City)) +
- scale_y_continuous("", labels = scales::comma_format(big.mark = " "))+
- stat_steamgraph() +
- theme_elegant_dark() +
- scale_fill_viridis_d() +
- theme(plot.margin = unit(c(1,3,1,1), "lines"),
- axis.line.y = element_blank(),
- axis.line.x = element_blank(),
- axis.ticks.x = element_line(),
- panel.grid.major.y = element_line(color= 'grey30', size = .2) )
-
- ```
-
- ---
- class: full
- ```{r}
- p
- ```
- ---
- class: inverse, center, middle
- # Barchart race
-
- ---
- ## Load data
- ```{r load_data}
- data("us_city_populations")
-
- n_cities = 10
-
- top_cities <-us_city_populations %>% filter(Rank <= n_cities) %>%
- select(City, State, Region) %>% distinct()
-
- ```
-
- ---
- ## Create all missing dates
- ```{r, combine_dates}
- # create a data frame with all the years between min and max Year
- all_years <- data.frame(Year = seq(min(us_city_populations$Year),
- max(us_city_populations$Year), 1))
-
- # combine top_cities and all_years
- all_combos <- merge(top_cities, all_years, all = T)
-
- # combine all_combos with the original dataset
- res_interp <- merge(us_city_populations, all_combos, all.y = T)
- ```
-
- ## Interpolate the Populations when missing (linear interpolation here)
- ```{r, interpolate}
- res_interp <- res_interp %>%
- group_by(City) %>%
- mutate(Population=approx(Year,Population,Year)$y)
- ```
-
- ---
- ## Filter data
- ```{r, filter_for_plot}
- to_plot <- res_interp %>%
- group_by(Year) %>%
- arrange(-Population) %>%
- mutate(Rank=row_number()) %>%
- filter(Rank<=n_cities)
-
- ```
-
- ---
- ## Ease transitions
-
- ```{r}
- to_plot_trans <- to_plot %>%
- group_by(City) %>%
- arrange(Year) %>%
- mutate(lag_rank = lag(Rank, 1),
- change = ifelse(Rank > lag(Rank, 1), 1, 0),
- change = ifelse(Rank < lag(Rank, 1), -1, 0)) %>%
- mutate(transition = ifelse(lead(change, 1) == -1, -.9, 0),
- transition = ifelse(lead(change,2) == -1, -.5, transition),
- transition = ifelse(lead(change,3) == -1, -.3, transition),
- transition = ifelse(lead(change, 1) == 1, .9, transition),
- transition = ifelse(lead(change,2) == 1, .5, transition),
- transition = ifelse(lead(change,3) == 1, .3, transition)) %>%
- mutate(trans_rank = Rank + transition)
-
- ```
-
-
- ---
- ## Make the plot
- .small[
- ```{r, make_plot}
- p <- to_plot_trans %>%
- ggplot(aes(x = -trans_rank,y = Population, group =City)) +
- geom_tile(aes(y = Population / 2, height = Population, fill = Region),
- width = 0.9) +
- geom_text(aes(label = City),
- hjust = "right", colour = "white",
- fontface="bold", nudge_y = -100000) +
- geom_text(aes(label = scales::comma(Population,big.mark = ' ')),
- hjust = "left", nudge_y = 100000, colour = "grey90") +
- coord_flip(clip="off") +
- hrbrthemes::scale_fill_ipsum() +
- scale_x_discrete("") +
- scale_y_continuous("",labels=scales::comma_format(big.mark = " ")) +
- theme_elegant_dark(base_size = 20) +
- theme(
- panel.grid.minor.x=element_blank(),
- axis.line = element_blank(),
- panel.grid.major= element_line(color='lightgrey', size=.2),
- legend.position = c(0.6, 0.2),
- plot.margin = margin(1,1,1,2,"cm"),
- plot.title = element_text(hjust = 0),
- axis.text.y=element_blank(),
- legend.text = element_text(size = 15),
- legend.background = element_blank()) +
- # gganimate code to transition by year:
- transition_time(Year) +
- ease_aes('cubic-in-out') +
- labs(title='Evolution des plus grandes villes US',
- subtitle='Population en {round(frame_time,0)}')
-
- ```
- ]
- ---
- ```{r, animate, eval = FALSE}
- animate(p, nframes = 400, fps = 25, end_pause = 30, width = 1200)
- anim_save("bar_race.gif", animation = last_animation())
- ```
-
- ![:scale 80%](bar_race.gif)
-
|