A graphTV clone
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

preprocess.R 890B

12345678910111213141516171819202122232425262728
  1. library(tidyverse)
  2. read_tsv("basics.tsv") -> basics
  3. read_tsv("episodes.tsv") -> episodes
  4. read_tsv("ratings.tsv") -> ratings
  5. basics %>%
  6. filter(titleType %in% c("tvSeries", "tvMiniSeries")) %>%
  7. select(-titleType) ->
  8. tvseries
  9. basics %>%
  10. filter(titleType == "tvEpisode") %>%
  11. select(-titleType, -startYear) ->
  12. tvepisodes
  13. tvseries %>%
  14. inner_join(episodes, by = c("tconst" = "parentTconst")) %>%
  15. select(id = tconst, seriesTitle = primaryTitle, eptconst = tconst.y, season = seasonNumber, episode = episodeNumber, startYear) %>%
  16. filter(season != "\\N") %>%
  17. inner_join(ratings, by = c("eptconst" = "tconst")) %>%
  18. inner_join(tvepisodes, by = c("eptconst" = "tconst")) %>%
  19. select(-eptconst, episodeTitle = primaryTitle) %>%
  20. mutate_at(vars(season, episode, averageRating, numVotes), as.numeric) %>%
  21. arrange(seriesTitle, season, episode) ->
  22. final
  23. saveRDS(final, "imdb.rds")