library(tidyverse) library(RSQLite) library(ipapi) commandArgs(trailingOnly = T) -> cmdargs dbfile <- cmdargs[1] logfile <- cmdargs[2] dbConnect(SQLite(), dbfile) -> db logfile %>% read_delim(delim = " ", col_names = c("ip", "null1", "user", "timestamp", "zone", "req", "status", "size", "referer", "agent", "null2")) %>% mutate_all(na_if, "-") %>% filter(user %>% is.na) %>% filter(!ip %in% c("192.168.0.254", "164.2.255.244")) %>% mutate(timestamp = timestamp %>% as.POSIXct(format = "[%d/%b/%Y:%H:%M:%S")) %>% separate(req, into = c("method", "url", "version"), sep = " ") %>% select(ip, timestamp, url, status, referer, agent) %>% dbWriteTable(conn = db, name = "access", value = ., append = T) known_ips <- character(0) db %>% tbl("geoip") %>% select(ip) %>% collect %>% pull(ip) -> known_ips db %>% tbl("access") %>% select(ip) %>% collect %>% pull(ip) %>% unique %>% setdiff(known_ips) -> ips if (length(ips) > 0) { ips %>% geolocate %>% select(ip = query, city, country, lat, lon) %>% dbWriteTable(conn = db, name = "geoip", value = ., append = T) }