library(tidyverse) read_tsv("basics.tsv") -> basics read_tsv("episodes.tsv") -> episodes read_tsv("ratings.tsv") -> ratings basics %>% filter(titleType %in% c("tvSeries", "tvMiniSeries")) %>% select(-titleType) -> tvseries basics %>% filter(titleType == "tvEpisode") %>% select(-titleType, -startYear) -> tvepisodes tvseries %>% inner_join(episodes, by = c("tconst" = "parentTconst")) %>% select(id = tconst, seriesTitle = primaryTitle, eptconst = tconst.y, season = seasonNumber, episode = episodeNumber, startYear) %>% filter(season != "\\N") %>% inner_join(ratings, by = c("eptconst" = "tconst")) %>% inner_join(tvepisodes, by = c("eptconst" = "tconst")) %>% select(-eptconst, episodeTitle = primaryTitle) %>% mutate_at(vars(season, episode, averageRating, numVotes), as.numeric) %>% arrange(seriesTitle, season, episode) -> final saveRDS(final, "imdb.rds")