diff --git a/feed_db.R b/feed_db.R new file mode 100644 index 0000000..45ca41f --- /dev/null +++ b/feed_db.R @@ -0,0 +1,43 @@ +library(tidyverse) +library(RSQLite) +library(ipapi) + +commandArgs(trailingOnly = T) -> cmdargs +dbfile <- cmdargs[1] +logfile <- cmdargs[2] + +dbConnect(SQLite(), dbfile) -> db + +logfile %>% + read_delim(delim = " ", col_names = c("ip", "null1", "user", "timestamp", "zone", "req", "status", "size", "referer", "agent", "null2")) %>% + mutate_all(na_if, "-") %>% + filter(user %>% is.na) %>% + filter(!ip %in% c("192.168.0.254", "164.2.255.244")) %>% + mutate(timestamp = timestamp %>% as.POSIXct(format = "[%d/%b/%Y:%H:%M:%S")) %>% + separate(req, into = c("method", "url", "version"), sep = " ") %>% + select(ip, timestamp, url, status, referer, agent) %>% + dbWriteTable(conn = db, name = "access", value = ., append = T) + +known_ips <- character(0) + +db %>% + tbl("geoip") %>% + select(query) %>% + collect %>% + pull(query) -> known_ips + +db %>% + tbl("access") %>% + select(ip) %>% + collect %>% + pull(ip) %>% + unique %>% + setdiff(known_ips) -> ips + +if (length(ips) > 0) +{ + ips %>% + geolocate %>% + select(ip = query, city, country, lat, lon) %>% + dbWriteTable(conn = db, name = "geoip", value = ., append = T) +} diff --git a/geoip.R b/geoip.R index 45ca41f..3bae6d1 100644 --- a/geoip.R +++ b/geoip.R @@ -1,43 +1,17 @@ library(tidyverse) -library(RSQLite) -library(ipapi) +library(leaflet) -commandArgs(trailingOnly = T) -> cmdargs -dbfile <- cmdargs[1] -logfile <- cmdargs[2] - -dbConnect(SQLite(), dbfile) -> db - -logfile %>% - read_delim(delim = " ", col_names = c("ip", "null1", "user", "timestamp", "zone", "req", "status", "size", "referer", "agent", "null2")) %>% - mutate_all(na_if, "-") %>% - filter(user %>% is.na) %>% - filter(!ip %in% c("192.168.0.254", "164.2.255.244")) %>% - mutate(timestamp = timestamp %>% as.POSIXct(format = "[%d/%b/%Y:%H:%M:%S")) %>% - separate(req, into = c("method", "url", "version"), sep = " ") %>% - select(ip, timestamp, url, status, referer, agent) %>% - dbWriteTable(conn = db, name = "access", value = ., append = T) - -known_ips <- character(0) - -db %>% - tbl("geoip") %>% - select(query) %>% - collect %>% - pull(query) -> known_ips - -db %>% +src_sqlite("access.db") %>% tbl("access") %>% - select(ip) %>% + left_join(db %>% tbl("geoip")) %>% collect %>% - pull(ip) %>% - unique %>% - setdiff(known_ips) -> ips + mutate(timestamp = timestamp %>% as.POSIXct(origin = "1970-01-01")) %>% + mutate_at(vars(city, country, agent), factor) -> geoaccess -if (length(ips) > 0) -{ - ips %>% - geolocate %>% - select(ip = query, city, country, lat, lon) %>% - dbWriteTable(conn = db, name = "geoip", value = ., append = T) -} +geoaccess %>% + filter(status != 404, + !agent %>% str_detect("bot")) %>% + distinct(ip, lon, lat) %>% + leaflet %>% + addProviderTiles(providers$CartoDB.Positron) %>% + addMarkers(~lon, ~lat, clusterOptions = markerClusterOptions()) diff --git a/viz.R b/viz.R deleted file mode 100644 index 3bae6d1..0000000 --- a/viz.R +++ /dev/null @@ -1,17 +0,0 @@ -library(tidyverse) -library(leaflet) - -src_sqlite("access.db") %>% - tbl("access") %>% - left_join(db %>% tbl("geoip")) %>% - collect %>% - mutate(timestamp = timestamp %>% as.POSIXct(origin = "1970-01-01")) %>% - mutate_at(vars(city, country, agent), factor) -> geoaccess - -geoaccess %>% - filter(status != 404, - !agent %>% str_detect("bot")) %>% - distinct(ip, lon, lat) %>% - leaflet %>% - addProviderTiles(providers$CartoDB.Positron) %>% - addMarkers(~lon, ~lat, clusterOptions = markerClusterOptions())