@@ -0,0 +1,43 @@ | |||
library(tidyverse) | |||
library(RSQLite) | |||
library(ipapi) | |||
commandArgs(trailingOnly = T) -> cmdargs | |||
dbfile <- cmdargs[1] | |||
logfile <- cmdargs[2] | |||
dbConnect(SQLite(), dbfile) -> db | |||
logfile %>% | |||
read_delim(delim = " ", col_names = c("ip", "null1", "user", "timestamp", "zone", "req", "status", "size", "referer", "agent", "null2")) %>% | |||
mutate_all(na_if, "-") %>% | |||
filter(user %>% is.na) %>% | |||
filter(!ip %in% c("192.168.0.254", "164.2.255.244")) %>% | |||
mutate(timestamp = timestamp %>% as.POSIXct(format = "[%d/%b/%Y:%H:%M:%S")) %>% | |||
separate(req, into = c("method", "url", "version"), sep = " ") %>% | |||
select(ip, timestamp, url, status, referer, agent) %>% | |||
dbWriteTable(conn = db, name = "access", value = ., append = T) | |||
known_ips <- character(0) | |||
db %>% | |||
tbl("geoip") %>% | |||
select(query) %>% | |||
collect %>% | |||
pull(query) -> known_ips | |||
db %>% | |||
tbl("access") %>% | |||
select(ip) %>% | |||
collect %>% | |||
pull(ip) %>% | |||
unique %>% | |||
setdiff(known_ips) -> ips | |||
if (length(ips) > 0) | |||
{ | |||
ips %>% | |||
geolocate %>% | |||
select(ip = query, city, country, lat, lon) %>% | |||
dbWriteTable(conn = db, name = "geoip", value = ., append = T) | |||
} |
@@ -1,43 +1,17 @@ | |||
library(tidyverse) | |||
library(RSQLite) | |||
library(ipapi) | |||
library(leaflet) | |||
commandArgs(trailingOnly = T) -> cmdargs | |||
dbfile <- cmdargs[1] | |||
logfile <- cmdargs[2] | |||
dbConnect(SQLite(), dbfile) -> db | |||
logfile %>% | |||
read_delim(delim = " ", col_names = c("ip", "null1", "user", "timestamp", "zone", "req", "status", "size", "referer", "agent", "null2")) %>% | |||
mutate_all(na_if, "-") %>% | |||
filter(user %>% is.na) %>% | |||
filter(!ip %in% c("192.168.0.254", "164.2.255.244")) %>% | |||
mutate(timestamp = timestamp %>% as.POSIXct(format = "[%d/%b/%Y:%H:%M:%S")) %>% | |||
separate(req, into = c("method", "url", "version"), sep = " ") %>% | |||
select(ip, timestamp, url, status, referer, agent) %>% | |||
dbWriteTable(conn = db, name = "access", value = ., append = T) | |||
known_ips <- character(0) | |||
db %>% | |||
tbl("geoip") %>% | |||
select(query) %>% | |||
collect %>% | |||
pull(query) -> known_ips | |||
db %>% | |||
src_sqlite("access.db") %>% | |||
tbl("access") %>% | |||
select(ip) %>% | |||
left_join(db %>% tbl("geoip")) %>% | |||
collect %>% | |||
pull(ip) %>% | |||
unique %>% | |||
setdiff(known_ips) -> ips | |||
mutate(timestamp = timestamp %>% as.POSIXct(origin = "1970-01-01")) %>% | |||
mutate_at(vars(city, country, agent), factor) -> geoaccess | |||
if (length(ips) > 0) | |||
{ | |||
ips %>% | |||
geolocate %>% | |||
select(ip = query, city, country, lat, lon) %>% | |||
dbWriteTable(conn = db, name = "geoip", value = ., append = T) | |||
} | |||
geoaccess %>% | |||
filter(status != 404, | |||
!agent %>% str_detect("bot")) %>% | |||
distinct(ip, lon, lat) %>% | |||
leaflet %>% | |||
addProviderTiles(providers$CartoDB.Positron) %>% | |||
addMarkers(~lon, ~lat, clusterOptions = markerClusterOptions()) |
@@ -1,17 +0,0 @@ | |||
library(tidyverse) | |||
library(leaflet) | |||
src_sqlite("access.db") %>% | |||
tbl("access") %>% | |||
left_join(db %>% tbl("geoip")) %>% | |||
collect %>% | |||
mutate(timestamp = timestamp %>% as.POSIXct(origin = "1970-01-01")) %>% | |||
mutate_at(vars(city, country, agent), factor) -> geoaccess | |||
geoaccess %>% | |||
filter(status != 404, | |||
!agent %>% str_detect("bot")) %>% | |||
distinct(ip, lon, lat) %>% | |||
leaflet %>% | |||
addProviderTiles(providers$CartoDB.Positron) %>% | |||
addMarkers(~lon, ~lat, clusterOptions = markerClusterOptions()) |