You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

245 line
6.5KB

  1. ---
  2. title: "Lab 07 - Données temporelles et textuelles"
  3. author: "Antoine Neuraz"
  4. date: "22/11/2019"
  5. output:
  6. xaringan::moon_reader:
  7. css: ['default','css/my_style.css']
  8. lib_dir: libs
  9. seal: false
  10. nature:
  11. ratio: '4:3'
  12. countIncrementalSlides: false
  13. self-contained: true
  14. beforeInit: "addons/macros.js"
  15. ---
  16. ```{r setup, include=FALSE}
  17. knitr::opts_chunk$set(echo = TRUE, fig.asp = .7, fig.width = 12)
  18. library(vizoR)
  19. library(ggplot2)
  20. library(gghighlight)
  21. library(dplyr)
  22. library(ggTimeSeries)
  23. library(hrbrthemes)
  24. library(gganimate)
  25. ```
  26. ## TODO
  27. #### 1. charger le dataset `us_city_populations` de la librairie `vizoR`
  28. #### 2. tracer un line chart de l'évolution de la population des villes US
  29. #### 3. Mettez en évidence les 5 plus grandes villes (hint: package gghighlight)
  30. [introduction gghighlight](https://cran.r-project.org/web/packages/gghighlight/vignettes/gghighlight.html)
  31. #### 4. Appliquez les principes de design de Tufte
  32. ##### 5. BONUS: affichez le nom des villes directement à la fin de la ligne
  33. #### 6. Réalisez un streamgraph des 5 plus grandes villes US (hint: package ggTimeSeries)
  34. ---
  35. ## TODO 2
  36. #### Trouver une 3e visualization pertinente pour montrer l'évolution de la population des villes US.
  37. ---
  38. ```{r}
  39. data("us_city_populations")
  40. n_cities = 5
  41. last_ranks <- us_city_populations %>%
  42. filter(Year == max(Year)) %>%
  43. mutate(last_rank = Rank) %>%
  44. select(City, last_rank)
  45. to_plot <- left_join(us_city_populations, last_ranks, by= 'City')
  46. right_axis <- to_plot %>%
  47. group_by(City) %>%
  48. top_n(1, Year) %>%
  49. ungroup() %>%
  50. top_n(n_cities, -last_rank)
  51. ends <- right_axis %>%
  52. pull(Population)
  53. labels <- right_axis %>%
  54. pull(City)
  55. ```
  56. ---
  57. class: full
  58. ```{r, echo = TRUE}
  59. p <- ggplot(to_plot, aes(x=Year, y = Population,
  60. group = City, color = City)) +
  61. geom_line(size=1) +
  62. scale_x_continuous("", expand=c(0,0))+
  63. scale_y_continuous("",
  64. labels=scales::comma_format(big.mark = " "),
  65. sec.axis = sec_axis(~ .,
  66. breaks = ends,
  67. labels = labels ))+
  68. scale_color_viridis_d()+
  69. theme_elegant_dark()+
  70. theme(legend.position = "none",
  71. plot.margin = unit(c(1,3,1,1), "lines"),
  72. axis.line.y = element_blank(),
  73. axis.line.x = element_blank(),
  74. axis.ticks.x = element_line(),
  75. panel.grid.major.y = element_line(color= 'grey30', size = .2) ) +
  76. gghighlight(max(last_rank) <= n_cities,
  77. use_direct_label = FALSE,
  78. label_key = City,
  79. unhighlighted_colour = "grey20")
  80. ```
  81. ---
  82. class: full
  83. ```{r, echo = TRUE}
  84. p
  85. ```
  86. ---
  87. class: full
  88. ```{r, echo = TRUE}
  89. library(ggTimeSeries)
  90. p <- to_plot %>% filter(City %in% labels) %>%
  91. ggplot(aes(x = Year, y = Population, group = City, fill = City)) +
  92. scale_y_continuous("", labels = scales::comma_format(big.mark = " "))+
  93. stat_steamgraph() +
  94. theme_elegant_dark() +
  95. scale_fill_viridis_d() +
  96. theme(plot.margin = unit(c(1,3,1,1), "lines"),
  97. axis.line.y = element_blank(),
  98. axis.line.x = element_blank(),
  99. axis.ticks.x = element_line(),
  100. panel.grid.major.y = element_line(color= 'grey30', size = .2) )
  101. ```
  102. ---
  103. class: full
  104. ```{r}
  105. p
  106. ```
  107. ---
  108. class: inverse, center, middle
  109. # Barchart race
  110. ---
  111. ## Load data
  112. ```{r load_data}
  113. data("us_city_populations")
  114. n_cities = 10
  115. top_cities <-us_city_populations %>% filter(Rank <= n_cities) %>%
  116. select(City, State, Region) %>% distinct()
  117. ```
  118. ---
  119. ## Create all missing dates
  120. ```{r, combine_dates}
  121. # create a data frame with all the years between min and max Year
  122. all_years <- data.frame(Year = seq(min(us_city_populations$Year),
  123. max(us_city_populations$Year), 1))
  124. # combine top_cities and all_years
  125. all_combos <- merge(top_cities, all_years, all = T)
  126. # combine all_combos with the original dataset
  127. res_interp <- merge(us_city_populations, all_combos, all.y = T)
  128. ```
  129. ## Interpolate the Populations when missing (linear interpolation here)
  130. ```{r, interpolate}
  131. res_interp <- res_interp %>%
  132. group_by(City) %>%
  133. mutate(Population=approx(Year,Population,Year)$y)
  134. ```
  135. ---
  136. ## Filter data
  137. ```{r, filter_for_plot}
  138. to_plot <- res_interp %>%
  139. group_by(Year) %>%
  140. arrange(-Population) %>%
  141. mutate(Rank=row_number()) %>%
  142. filter(Rank<=n_cities)
  143. ```
  144. ---
  145. ## Ease transitions
  146. ```{r}
  147. to_plot_trans <- to_plot %>%
  148. group_by(City) %>%
  149. arrange(Year) %>%
  150. mutate(lag_rank = lag(Rank, 1),
  151. change = ifelse(Rank > lag(Rank, 1), 1, 0),
  152. change = ifelse(Rank < lag(Rank, 1), -1, 0)) %>%
  153. mutate(transition = ifelse(lead(change, 1) == -1, -.9, 0),
  154. transition = ifelse(lead(change,2) == -1, -.5, transition),
  155. transition = ifelse(lead(change,3) == -1, -.3, transition),
  156. transition = ifelse(lead(change, 1) == 1, .9, transition),
  157. transition = ifelse(lead(change,2) == 1, .5, transition),
  158. transition = ifelse(lead(change,3) == 1, .3, transition)) %>%
  159. mutate(trans_rank = Rank + transition)
  160. ```
  161. ---
  162. ## Make the plot
  163. .small[
  164. ```{r, make_plot}
  165. p <- to_plot_trans %>%
  166. ggplot(aes(x = -trans_rank,y = Population, group =City)) +
  167. geom_tile(aes(y = Population / 2, height = Population, fill = Region),
  168. width = 0.9) +
  169. geom_text(aes(label = City),
  170. hjust = "right", colour = "white",
  171. fontface="bold", nudge_y = -100000) +
  172. geom_text(aes(label = scales::comma(Population,big.mark = ' ')),
  173. hjust = "left", nudge_y = 100000, colour = "grey90") +
  174. coord_flip(clip="off") +
  175. hrbrthemes::scale_fill_ipsum() +
  176. scale_x_discrete("") +
  177. scale_y_continuous("",labels=scales::comma_format(big.mark = " ")) +
  178. theme_elegant_dark(base_size = 20) +
  179. theme(
  180. panel.grid.minor.x=element_blank(),
  181. axis.line = element_blank(),
  182. panel.grid.major= element_line(color='lightgrey', size=.2),
  183. legend.position = c(0.6, 0.2),
  184. plot.margin = margin(1,1,1,2,"cm"),
  185. plot.title = element_text(hjust = 0),
  186. axis.text.y=element_blank(),
  187. legend.text = element_text(size = 15),
  188. legend.background = element_blank()) +
  189. # gganimate code to transition by year:
  190. transition_time(Year) +
  191. ease_aes('cubic-in-out') +
  192. labs(title='Evolution des plus grandes villes US',
  193. subtitle='Population en {round(frame_time,0)}')
  194. ```
  195. ]
  196. ---
  197. ```{r, animate, eval = FALSE}
  198. animate(p, nframes = 400, fps = 25, end_pause = 30, width = 1200)
  199. anim_save("bar_race.gif", animation = last_animation())
  200. ```
  201. ![:scale 80%](bar_race.gif)