|
|
@@ -1,43 +1,17 @@ |
|
|
|
library(tidyverse) |
|
|
|
library(RSQLite) |
|
|
|
library(ipapi) |
|
|
|
library(leaflet) |
|
|
|
|
|
|
|
commandArgs(trailingOnly = T) -> cmdargs |
|
|
|
dbfile <- cmdargs[1] |
|
|
|
logfile <- cmdargs[2] |
|
|
|
|
|
|
|
dbConnect(SQLite(), dbfile) -> db |
|
|
|
|
|
|
|
logfile %>% |
|
|
|
read_delim(delim = " ", col_names = c("ip", "null1", "user", "timestamp", "zone", "req", "status", "size", "referer", "agent", "null2")) %>% |
|
|
|
mutate_all(na_if, "-") %>% |
|
|
|
filter(user %>% is.na) %>% |
|
|
|
filter(!ip %in% c("192.168.0.254", "164.2.255.244")) %>% |
|
|
|
mutate(timestamp = timestamp %>% as.POSIXct(format = "[%d/%b/%Y:%H:%M:%S")) %>% |
|
|
|
separate(req, into = c("method", "url", "version"), sep = " ") %>% |
|
|
|
select(ip, timestamp, url, status, referer, agent) %>% |
|
|
|
dbWriteTable(conn = db, name = "access", value = ., append = T) |
|
|
|
|
|
|
|
known_ips <- character(0) |
|
|
|
|
|
|
|
db %>% |
|
|
|
tbl("geoip") %>% |
|
|
|
select(query) %>% |
|
|
|
collect %>% |
|
|
|
pull(query) -> known_ips |
|
|
|
|
|
|
|
db %>% |
|
|
|
src_sqlite("access.db") %>% |
|
|
|
tbl("access") %>% |
|
|
|
select(ip) %>% |
|
|
|
left_join(db %>% tbl("geoip")) %>% |
|
|
|
collect %>% |
|
|
|
pull(ip) %>% |
|
|
|
unique %>% |
|
|
|
setdiff(known_ips) -> ips |
|
|
|
mutate(timestamp = timestamp %>% as.POSIXct(origin = "1970-01-01")) %>% |
|
|
|
mutate_at(vars(city, country, agent), factor) -> geoaccess |
|
|
|
|
|
|
|
if (length(ips) > 0) |
|
|
|
{ |
|
|
|
ips %>% |
|
|
|
geolocate %>% |
|
|
|
select(ip = query, city, country, lat, lon) %>% |
|
|
|
dbWriteTable(conn = db, name = "geoip", value = ., append = T) |
|
|
|
} |
|
|
|
geoaccess %>% |
|
|
|
filter(status != 404, |
|
|
|
!agent %>% str_detect("bot")) %>% |
|
|
|
distinct(ip, lon, lat) %>% |
|
|
|
leaflet %>% |
|
|
|
addProviderTiles(providers$CartoDB.Positron) %>% |
|
|
|
addMarkers(~lon, ~lat, clusterOptions = markerClusterOptions()) |