|
- library(tidyverse)
- library(shiny)
- library(miniUI)
- library(broom)
-
- tibble(x = runif(20, 0, 10),
- y = x^2 - 2*x + rnorm(20, 0, 5)) -> df
-
- df %>%
- ggplot() +
- aes(x = x, y = y) +
- geom_point() -> g
-
- df %>%
- sample_n(10) -> train
-
- df %>%
- anti_join(train) -> test
-
- train %>%
- ggplot() +
- aes(x = x, y = y) +
- geom_point() -> gtrain
-
- gtrain +
- geom_point(data = test, aes(color = "red")) +
- theme(legend.position = "none") -> g
-
- ggsave(filename = "set.png", plot = g)
-
- powers <- function(df, n)
- {
- for (i in 1:n)
- df[[str_c("x", i)]] <- df$x^i
-
- df
- }
-
- LM <- function(df, n)
- {
- str_c("y ~ ", str_c("x", 1:n, collapse = " + ")) %>% as.formula -> form
-
- df <- powers(df, n)
-
- lm(form, data = df)
- }
-
- PREDICT <- function(train, test, n)
- {
- model <- LM(train, n)
-
- test <- powers(test, n)
-
- mean((test$y - predict(model, test))^2)
- }
-
- Fggfun <- function(model)
- {
- model %>%
- pull(estimate) -> est
- Vectorize(
- function(x)
- {
- sum(x^(0:(length(est) - 1)) * est)
- })
- }
-
- 1:9 %>% map(~LM(train, .) %>% tidy %>% Fggfun) -> ggfuns
-
- 1:9 %>%
- map(function(n)
- {
- LM(train, n) %>%
- tidy %>%
- Fggfun %>%
- stat_function(fun = .)
- }) -> regs
-
- 1:9 %>%
- map(function(n)
- {
- (gtrain + regs[n]) %>%
- ggsave(filename = str_c("train_", n, ".png"))
- })
-
- 1:9 %>%
- map(function(n)
- {
- (g + regs[n]) %>%
- ggsave(filename = str_c("all_", n, ".png"))
- })
-
- (
- 1:9 %>%
- map_dbl(~LM(train, .) %>% glance %>% pull(r.squared)) %>%
- tibble(x = 1:9, y = .) %>%
- ggplot() +
- aes(x = x, y = y) +
- geom_point() +
- geom_line() +
- scale_x_continuous(breaks = 0:10)
- ) %>%
- ggsave(filename = "rsquared.png", plot = .)
-
- (
- 1:9 %>%
- map_dbl(~PREDICT(train, test, .)) %>%
- tibble(x = 1:9, y = .) %>%
- ggplot() +
- aes(x = x, y = y) +
- geom_point() +
- geom_line() +
- scale_x_continuous(breaks = 0:10)
- ) %>%
- ggsave(filename = "error.png", plot = .)
-
-
- # Bootstrap ----
-
- x <- rnorm(30)
- hist(x)
- summary(x)
- t.test(x)
-
- bootstrap <- function(x, b)
- {
- 1:b %>%
- map_dbl(function(y)
- {
- sample(x, replace = T) %>%
- mean
- }) -> bootstraped
-
- hist(bootstraped)
- abline(v = mean(bootstraped))
- abline(v = quantile(bootstraped, probs = c(.025, .975)), lty = 2)
-
- list(mean = mean(bootstraped), IC = quantile(bootstraped, probs = c(.025,.975)))
- }
-
- bootstrap(x, 10000)
-
- # CV ----
-
- cvpart <- function(df, k)
- {
- n <- nrow(df)
- rep(1:k, length.out = n) %>%
- sample -> parts
-
- df %>%
- split(parts) -> tests
-
- tests %>%
- map(~anti_join(df, .)) -> trains
-
- transpose(list(train = trains, test = tests))
- }
-
- cvdegree <- function(df, k, N)
- {
- df %>%
- cvpart(k) -> parts
-
- 1:N %>%
- map_dbl(function(n)
- {
- parts %>%
- map_dbl(function(df)
- {
- PREDICT(df$train, df$test, n)
- }) %>%
- mean
- }) %>%
- which.min
- }
-
- tibble(x = runif(200, 0, 10),
- y = x^2 - 2*x + rnorm(200, 0, 5)) -> df
-
- df %>%
- ggplot() +
- aes(x = x, y = y) +
- geom_point() -> g
|