|
12345678910111213141516171819202122232425262728293031323334353637383940414243 |
- library(tidyverse)
- library(RSQLite)
- library(ipapi)
-
- commandArgs(trailingOnly = T) -> cmdargs
- dbfile <- cmdargs[1]
- logfile <- cmdargs[2]
-
- dbConnect(SQLite(), dbfile) -> db
-
- logfile %>%
- read_delim(delim = " ", col_names = c("ip", "null1", "user", "timestamp", "zone", "req", "status", "size", "referer", "agent", "null2")) %>%
- mutate_all(na_if, "-") %>%
- filter(user %>% is.na) %>%
- filter(!ip %in% c("192.168.0.254", "164.2.255.244")) %>%
- mutate(timestamp = timestamp %>% as.POSIXct(format = "[%d/%b/%Y:%H:%M:%S")) %>%
- separate(req, into = c("method", "url", "version"), sep = " ") %>%
- select(ip, timestamp, url, status, referer, agent) %>%
- dbWriteTable(conn = db, name = "access", value = ., append = T)
-
- known_ips <- character(0)
-
- db %>%
- tbl("geoip") %>%
- select(ip) %>%
- collect %>%
- pull(ip) -> known_ips
-
- db %>%
- tbl("access") %>%
- select(ip) %>%
- collect %>%
- pull(ip) %>%
- unique %>%
- setdiff(known_ips) -> ips
-
- if (length(ips) > 0)
- {
- ips %>%
- geolocate %>%
- select(ip = query, city, country, lat, lon) %>%
- dbWriteTable(conn = db, name = "geoip", value = ., append = T)
- }
|