Browse Source

R preprocessing

master
Maxime Wack 6 years ago
parent
commit
576a275474
1 changed files with 31 additions and 0 deletions
  1. +31
    -0
      preprocess.R

+ 31
- 0
preprocess.R View File

@@ -0,0 +1,31 @@
library(tidyverse)
library(RSQLite)

imdb <- dbConnect(SQLite(), "imdb.db")

read_tsv("basics.tsv") -> basics
read_tsv("episodes.tsv") -> episodes
read_tsv("ratings.tsv") -> ratings

basics %>%
filter(titleType == "tvSeries") %>%
select(-titleType) ->
tvseries

basics %>%
filter(titleType == "tvEpisode") %>%
select(-titleType, -startYear) ->
tvepisodes

tvseries %>%
inner_join(episodes, by = c("tconst" = "parentTconst")) %>%
select(-tconst, seriesTitle = primaryTitle, tconst = tconst.y, season = seasonNumber, episode = episodeNumber) %>%
filter(season != "\\N") %>%
inner_join(ratings) %>%
inner_join(tvepisodes) %>%
select(-tconst, episodeTitle = primaryTitle) %>%
arrange(seriesTitle, season, episode) ->
final

final %>%
dbWriteTable(conn = imdb, name = "imdb", value = .)

Loading…
Cancel
Save