Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

lab7-temporal_data.html 9.5KB

5 anos atrás
5 anos atrás
5 anos atrás
5 anos atrás
5 anos atrás
5 anos atrás
5 anos atrás
5 anos atrás
5 anos atrás
5 anos atrás
5 anos atrás
5 anos atrás
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. <!DOCTYPE html>
  2. <html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang="">
  3. <head>
  4. <title>Lab 07 - Données temporelles et textuelles</title>
  5. <meta charset="utf-8" />
  6. <meta name="author" content="Antoine Neuraz" />
  7. <link href="libs/remark-css-0.0.1/default.css" rel="stylesheet" />
  8. <link rel="stylesheet" href="css/my_style.css" type="text/css" />
  9. </head>
  10. <body>
  11. <textarea id="source">
  12. ## TODO
  13. #### 1. charger le dataset `us_city_populations` de la librairie `vizoR`
  14. #### 2. tracer un line chart de l'évolution de la population des villes US
  15. #### 3. Mettez en évidence les 5 plus grandes villes (hint: package gghighlight)
  16. [introduction gghighlight](https://cran.r-project.org/web/packages/gghighlight/vignettes/gghighlight.html)
  17. #### 4. Appliquez les principes de design de Tufte
  18. ##### 5. BONUS: affichez le nom des villes directement à la fin de la ligne
  19. #### 6. Réalisez un streamgraph des 5 plus grandes villes US (hint: package ggTimeSeries)
  20. ---
  21. ## TODO 2
  22. #### Trouver une 3e visualization pertinente pour montrer l'évolution de la population des villes US.
  23. ---
  24. ```r
  25. data("us_city_populations")
  26. n_cities = 5
  27. last_ranks &lt;- us_city_populations %&gt;%
  28. filter(Year == max(Year)) %&gt;%
  29. mutate(last_rank = Rank) %&gt;%
  30. select(City, last_rank)
  31. to_plot &lt;- left_join(us_city_populations, last_ranks, by= 'City')
  32. right_axis &lt;- to_plot %&gt;%
  33. group_by(City) %&gt;%
  34. top_n(1, Year) %&gt;%
  35. ungroup() %&gt;%
  36. top_n(n_cities, -last_rank)
  37. ends &lt;- right_axis %&gt;%
  38. pull(Population)
  39. labels &lt;- right_axis %&gt;%
  40. pull(City)
  41. ```
  42. ---
  43. class: full
  44. ```r
  45. p &lt;- ggplot(to_plot, aes(x=Year, y = Population,
  46. group = City, color = City)) +
  47. geom_line(size=1) +
  48. scale_x_continuous("", expand=c(0,0))+
  49. scale_y_continuous("",
  50. labels=scales::comma_format(big.mark = " "),
  51. sec.axis = sec_axis(~ .,
  52. breaks = ends,
  53. labels = labels ))+
  54. scale_color_viridis_d()+
  55. theme_elegant_dark()+
  56. theme(legend.position = "none",
  57. plot.margin = unit(c(1,3,1,1), "lines"),
  58. axis.line.y = element_blank(),
  59. axis.line.x = element_blank(),
  60. axis.ticks.x = element_line(),
  61. panel.grid.major.y = element_line(color= 'grey30', size = .2) ) +
  62. gghighlight(max(last_rank) &lt;= n_cities,
  63. use_direct_label = FALSE,
  64. label_key = City,
  65. unhighlighted_colour = "grey20")
  66. ```
  67. ---
  68. class: full
  69. ```r
  70. p
  71. ```
  72. ![](lab7-temporal_data_files/figure-html/unnamed-chunk-3-1.png)&lt;!-- --&gt;
  73. ---
  74. class: full
  75. ```r
  76. library(ggTimeSeries)
  77. p &lt;- to_plot %&gt;% filter(City %in% labels) %&gt;%
  78. ggplot(aes(x = Year, y = Population, group = City, fill = City)) +
  79. scale_y_continuous("", labels = scales::comma_format(big.mark = " "))+
  80. stat_steamgraph() +
  81. theme_elegant_dark() +
  82. scale_fill_viridis_d() +
  83. theme(plot.margin = unit(c(1,3,1,1), "lines"),
  84. axis.line.y = element_blank(),
  85. axis.line.x = element_blank(),
  86. axis.ticks.x = element_line(),
  87. panel.grid.major.y = element_line(color= 'grey30', size = .2) )
  88. ```
  89. ---
  90. class: full
  91. ```r
  92. p
  93. ```
  94. ![](lab7-temporal_data_files/figure-html/unnamed-chunk-5-1.png)&lt;!-- --&gt;
  95. ---
  96. class: inverse, center, middle
  97. # Barchart race
  98. ---
  99. ## Load data
  100. ```r
  101. data("us_city_populations")
  102. n_cities = 10
  103. top_cities &lt;-us_city_populations %&gt;% filter(Rank &lt;= n_cities) %&gt;%
  104. select(City, State, Region) %&gt;% distinct()
  105. ```
  106. ---
  107. ## Create all missing dates
  108. ```r
  109. # create a data frame with all the years between min and max Year
  110. all_years &lt;- data.frame(Year = seq(min(us_city_populations$Year),
  111. max(us_city_populations$Year), 1))
  112. # combine top_cities and all_years
  113. all_combos &lt;- merge(top_cities, all_years, all = T)
  114. # combine all_combos with the original dataset
  115. res_interp &lt;- merge(us_city_populations, all_combos, all.y = T)
  116. ```
  117. ## Interpolate the Populations when missing (linear interpolation here)
  118. ```r
  119. res_interp &lt;- res_interp %&gt;%
  120. group_by(City) %&gt;%
  121. mutate(Population=approx(Year,Population,Year)$y)
  122. ```
  123. ---
  124. ## Filter data
  125. ```r
  126. to_plot &lt;- res_interp %&gt;%
  127. group_by(Year) %&gt;%
  128. arrange(-Population) %&gt;%
  129. mutate(Rank=row_number()) %&gt;%
  130. filter(Rank&lt;=n_cities)
  131. ```
  132. ---
  133. ## Ease transitions
  134. ```r
  135. to_plot_trans &lt;- to_plot %&gt;%
  136. group_by(City) %&gt;%
  137. arrange(Year) %&gt;%
  138. mutate(lag_rank = lag(Rank, 1),
  139. change = ifelse(Rank &gt; lag(Rank, 1), 1, 0),
  140. change = ifelse(Rank &lt; lag(Rank, 1), -1, 0)) %&gt;%
  141. mutate(transition = ifelse(lead(change, 1) == -1, -.9, 0),
  142. transition = ifelse(lead(change,2) == -1, -.5, transition),
  143. transition = ifelse(lead(change,3) == -1, -.3, transition),
  144. transition = ifelse(lead(change, 1) == 1, .9, transition),
  145. transition = ifelse(lead(change,2) == 1, .5, transition),
  146. transition = ifelse(lead(change,3) == 1, .3, transition)) %&gt;%
  147. mutate(trans_rank = Rank + transition)
  148. ```
  149. ---
  150. ## Make the plot
  151. .small[
  152. ```r
  153. p &lt;- to_plot_trans %&gt;%
  154. ggplot(aes(x = -trans_rank,y = Population, group =City)) +
  155. geom_tile(aes(y = Population / 2, height = Population, fill = Region),
  156. width = 0.9) +
  157. geom_text(aes(label = City),
  158. hjust = "right", colour = "white",
  159. fontface="bold", nudge_y = -100000) +
  160. geom_text(aes(label = scales::comma(Population,big.mark = ' ')),
  161. hjust = "left", nudge_y = 100000, colour = "grey90") +
  162. coord_flip(clip="off") +
  163. hrbrthemes::scale_fill_ipsum() +
  164. scale_x_discrete("") +
  165. scale_y_continuous("",labels=scales::comma_format(big.mark = " ")) +
  166. theme_elegant_dark(base_size = 20) +
  167. theme(
  168. panel.grid.minor.x=element_blank(),
  169. axis.line = element_blank(),
  170. panel.grid.major= element_line(color='lightgrey', size=.2),
  171. legend.position = c(0.6, 0.2),
  172. plot.margin = margin(1,1,1,2,"cm"),
  173. plot.title = element_text(hjust = 0),
  174. axis.text.y=element_blank(),
  175. legend.text = element_text(size = 15),
  176. legend.background = element_blank()) +
  177. # gganimate code to transition by year:
  178. transition_time(Year) +
  179. ease_aes('cubic-in-out') +
  180. labs(title='Evolution des plus grandes villes US',
  181. subtitle='Population en {round(frame_time,0)}')
  182. ```
  183. ]
  184. ---
  185. ```r
  186. animate(p, nframes = 400, fps = 25, end_pause = 30, width = 1200)
  187. anim_save("bar_race.gif", animation = last_animation())
  188. ```
  189. ![:scale 80%](bar_race.gif)
  190. </textarea>
  191. <style data-target="print-only">@media screen {.remark-slide-container{display:block;}.remark-slide-scaler{box-shadow:none;}}</style>
  192. <script src="https://remarkjs.com/downloads/remark-latest.min.js"></script>
  193. <script src="addons/macros.js"></script>
  194. <script>var slideshow = remark.create({
  195. "ratio": "4:3",
  196. "countIncrementalSlides": false,
  197. "self-contained": true
  198. });
  199. if (window.HTMLWidgets) slideshow.on('afterShowSlide', function (slide) {
  200. window.dispatchEvent(new Event('resize'));
  201. });
  202. (function(d) {
  203. var s = d.createElement("style"), r = d.querySelector(".remark-slide-scaler");
  204. if (!r) return;
  205. s.type = "text/css"; s.innerHTML = "@page {size: " + r.style.width + " " + r.style.height +"; }";
  206. d.head.appendChild(s);
  207. })(document);
  208. (function(d) {
  209. var el = d.getElementsByClassName("remark-slides-area");
  210. if (!el) return;
  211. var slide, slides = slideshow.getSlides(), els = el[0].children;
  212. for (var i = 1; i < slides.length; i++) {
  213. slide = slides[i];
  214. if (slide.properties.continued === "true" || slide.properties.count === "false") {
  215. els[i - 1].className += ' has-continuation';
  216. }
  217. }
  218. var s = d.createElement("style");
  219. s.type = "text/css"; s.innerHTML = "@media print { .has-continuation { display: none; } }";
  220. d.head.appendChild(s);
  221. })(document);
  222. // delete the temporary CSS (for displaying all slides initially) when the user
  223. // starts to view slides
  224. (function() {
  225. var deleted = false;
  226. slideshow.on('beforeShowSlide', function(slide) {
  227. if (deleted) return;
  228. var sheets = document.styleSheets, node;
  229. for (var i = 0; i < sheets.length; i++) {
  230. node = sheets[i].ownerNode;
  231. if (node.dataset["target"] !== "print-only") continue;
  232. node.parentNode.removeChild(node);
  233. }
  234. deleted = true;
  235. });
  236. })();</script>
  237. <script>
  238. (function() {
  239. var links = document.getElementsByTagName('a');
  240. for (var i = 0; i < links.length; i++) {
  241. if (/^(https?:)?\/\//.test(links[i].getAttribute('href'))) {
  242. links[i].target = '_blank';
  243. }
  244. }
  245. })();
  246. </script>
  247. <script>
  248. slideshow._releaseMath = function(el) {
  249. var i, text, code, codes = el.getElementsByTagName('code');
  250. for (i = 0; i < codes.length;) {
  251. code = codes[i];
  252. if (code.parentNode.tagName !== 'PRE' && code.childElementCount === 0) {
  253. text = code.textContent;
  254. if (/^\\\((.|\s)+\\\)$/.test(text) || /^\\\[(.|\s)+\\\]$/.test(text) ||
  255. /^\$\$(.|\s)+\$\$$/.test(text) ||
  256. /^\\begin\{([^}]+)\}(.|\s)+\\end\{[^}]+\}$/.test(text)) {
  257. code.outerHTML = code.innerHTML; // remove <code></code>
  258. continue;
  259. }
  260. }
  261. i++;
  262. }
  263. };
  264. slideshow._releaseMath(document);
  265. </script>
  266. <!-- dynamically load mathjax for compatibility with self-contained -->
  267. <script>
  268. (function () {
  269. var script = document.createElement('script');
  270. script.type = 'text/javascript';
  271. script.src = 'https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-MML-AM_CHTML';
  272. if (location.protocol !== 'file:' && /^https?:/.test(script.src))
  273. script.src = script.src.replace(/^https?:/, '');
  274. document.getElementsByTagName('head')[0].appendChild(script);
  275. })();
  276. </script>
  277. </body>
  278. </html>