|
|
@@ -0,0 +1,115 @@ |
|
|
|
library(tidyverse) |
|
|
|
library(shiny) |
|
|
|
library(miniUI) |
|
|
|
library(broom) |
|
|
|
|
|
|
|
tibble(x = runif(20, 0, 10), |
|
|
|
y = x^2 - 2*x + rnorm(20, 0, 5)) -> df |
|
|
|
|
|
|
|
df %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = x, y = y) + |
|
|
|
geom_point() -> g |
|
|
|
|
|
|
|
df %>% |
|
|
|
sample_n(10) -> train |
|
|
|
|
|
|
|
df %>% |
|
|
|
anti_join(train) -> test |
|
|
|
|
|
|
|
train %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = x, y = y) + |
|
|
|
geom_point() -> gtrain |
|
|
|
|
|
|
|
gtrain + |
|
|
|
geom_point(data = test, aes(color = "red")) + |
|
|
|
theme(legend.position = "none") -> g |
|
|
|
|
|
|
|
ggsave(filename = "set.png", plot = g) |
|
|
|
|
|
|
|
powers <- function(df, n) |
|
|
|
{ |
|
|
|
for (i in 1:n) |
|
|
|
df[[str_c("x", i)]] <- df$x^i |
|
|
|
|
|
|
|
df |
|
|
|
} |
|
|
|
|
|
|
|
LM <- function(df, n) |
|
|
|
{ |
|
|
|
str_c("y ~ ", str_c("x", 1:n, collapse = " + ")) %>% as.formula -> form |
|
|
|
|
|
|
|
df <- powers(df, n) |
|
|
|
|
|
|
|
lm(form, data = df) |
|
|
|
} |
|
|
|
|
|
|
|
PREDICT <- function(train, test, n) |
|
|
|
{ |
|
|
|
model <- LM(train, n) |
|
|
|
|
|
|
|
test <- powers(test, n) |
|
|
|
|
|
|
|
mean((test$y - predict(model, test))^2) |
|
|
|
} |
|
|
|
|
|
|
|
Fggfun <- function(model) |
|
|
|
{ |
|
|
|
model %>% |
|
|
|
pull(estimate) -> est |
|
|
|
Vectorize( |
|
|
|
function(x) |
|
|
|
{ |
|
|
|
sum(x^(0:(length(est) - 1)) * est) |
|
|
|
}) |
|
|
|
} |
|
|
|
|
|
|
|
1:9 %>% map(~LM(train, .) %>% tidy %>% Fggfun) -> ggfuns |
|
|
|
|
|
|
|
1:9 %>% |
|
|
|
map(function(n) |
|
|
|
{ |
|
|
|
LM(train, n) %>% |
|
|
|
tidy %>% |
|
|
|
Fggfun %>% |
|
|
|
stat_function(fun = .) |
|
|
|
}) -> regs |
|
|
|
|
|
|
|
1:9 %>% |
|
|
|
map(function(n) |
|
|
|
{ |
|
|
|
(gtrain + regs[n]) %>% |
|
|
|
ggsave(filename = str_c("train_", n, ".png")) |
|
|
|
}) |
|
|
|
|
|
|
|
1:9 %>% |
|
|
|
map(function(n) |
|
|
|
{ |
|
|
|
(g + regs[n]) %>% |
|
|
|
ggsave(filename = str_c("all_", n, ".png")) |
|
|
|
}) |
|
|
|
|
|
|
|
( |
|
|
|
1:9 %>% |
|
|
|
map_dbl(~LM(train, .) %>% glance %>% pull(r.squared)) %>% |
|
|
|
tibble(x = 1:9, y = .) %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = x, y = y) + |
|
|
|
geom_point() + |
|
|
|
geom_line() + |
|
|
|
scale_x_continuous(breaks = 0:10) |
|
|
|
) %>% |
|
|
|
ggsave(filename = "rsquared.png", plot = .) |
|
|
|
|
|
|
|
( |
|
|
|
1:9 %>% |
|
|
|
map_dbl(~PREDICT(train, test, .)) %>% |
|
|
|
tibble(x = 1:9, y = .) %>% |
|
|
|
ggplot() + |
|
|
|
aes(x = x, y = y) + |
|
|
|
geom_point() + |
|
|
|
geom_line() + |
|
|
|
scale_x_continuous(breaks = 0:10) |
|
|
|
) %>% |
|
|
|
ggsave(filename = "error.png", plot = .) |