|
|
@@ -113,3 +113,73 @@ ggsave(filename = "rsquared.png", plot = .) |
|
|
|
scale_x_continuous(breaks = 0:10) |
|
|
|
) %>% |
|
|
|
ggsave(filename = "error.png", plot = .) |
|
|
|
|
|
|
|
|
|
|
|
# Bootstrap ---- |
|
|
|
|
|
|
|
x <- rnorm(30) |
|
|
|
hist(x) |
|
|
|
summary(x) |
|
|
|
t.test(x) |
|
|
|
|
|
|
|
bootstrap <- function(x, b) |
|
|
|
{ |
|
|
|
1:b %>% |
|
|
|
map_dbl(function(y) |
|
|
|
{ |
|
|
|
sample(x, replace = T) %>% |
|
|
|
mean |
|
|
|
}) -> bootstraped |
|
|
|
|
|
|
|
hist(bootstraped) |
|
|
|
abline(v = mean(bootstraped)) |
|
|
|
abline(v = quantile(bootstraped, probs = c(.025, .975)), lty = 2) |
|
|
|
|
|
|
|
list(mean = mean(bootstraped), IC = quantile(bootstraped, probs = c(.025,.975))) |
|
|
|
} |
|
|
|
|
|
|
|
bootstrap(x, 10000) |
|
|
|
|
|
|
|
# CV ---- |
|
|
|
|
|
|
|
cvpart <- function(df, k) |
|
|
|
{ |
|
|
|
n <- nrow(df) |
|
|
|
rep(1:k, length.out = n) %>% |
|
|
|
sample -> parts |
|
|
|
|
|
|
|
df %>% |
|
|
|
split(parts) -> tests |
|
|
|
|
|
|
|
tests %>% |
|
|
|
map(~anti_join(df, .)) -> trains |
|
|
|
|
|
|
|
transpose(list(train = trains, test = tests)) |
|
|
|
} |
|
|
|
|
|
|
|
cvdegree <- function(df, k, N) |
|
|
|
{ |
|
|
|
df %>% |
|
|
|
cvpart(k) -> parts |
|
|
|
|
|
|
|
1:N %>% |
|
|
|
map_dbl(function(n) |
|
|
|
{ |
|
|
|
parts %>% |
|
|
|
map_dbl(function(df) |
|
|
|
{ |
|
|
|
PREDICT(df$train, df$test, n) |
|
|
|
}) %>% |
|
|
|
mean |
|
|
|
}) %>% |
|
|
|
which.min |
|
|
|
} |
|
|
|
|
|
|
|
tibble(x = runif(200, 0, 10), |
|
|
|
y = x^2 - 2*x + rnorm(200, 0, 5)) -> df |
|
|
|
|
|
|
|
df %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = x, y = y) + |
|
|
|
geom_point() -> g |
|
|
|
|