Add analytics db
alex wennerberg alex@alexwennerberg.com
Sat, 02 Jan 2021 13:07:21 -0800
M
db.go
→
db.go
@@ -24,6 +24,25 @@ }
createTablesIfDNE() } +func getAnalyticsDB() *sql.DB { + db, err := sql.Open("sqlite3", c.AnalyticsDBFile) + _, err = db.Exec(`CREATE TABLE IF NOT EXISTS log ( + id INTEGER PRIMARY KEY NOT NULL, + timestamp TEXT NOT NULL, + protocol TEXT NOT NULL, + request_ip TEXT, + request_user TEXT, + status INTEGER, + destination_host TEXT, + path TEXT, + method TEXT +);`) + if err != nil { + log.Fatal(err) + } + return db +} + type File struct { // also folders Creator string Name string // includes folder
M
go.sum
→
go.sum
@@ -27,6 +27,8 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= github.com/gin-gonic/gin v1.6.3/go.mod h1:75u5sXoLsGZoRN5Sgbi1eraJ4GU3++wFwWzhwvtwp4M= +github.com/go-co-op/gocron v0.5.0 h1:QBxhIsODdq6u9+Cu3JehdhznE2IzuEokOddpLnxVrtg= +github.com/go-co-op/gocron v0.5.0/go.mod h1:6Btk4lVj3bnFAgbVfr76W8impTyhYrEi1pV5Pt4Tp/M= github.com/go-playground/assert/v2 v2.0.1/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA=
M
log.go
→
log.go
@@ -1,6 +1,8 @@
package main import ( + "bufio" + "database/sql" "fmt" gmi "git.sr.ht/~adnano/go-gemini" "github.com/gorilla/handlers"@@ -9,7 +11,10 @@ "log"
"net" "net/http" "net/url" + "os" + "regexp" "strconv" + "strings" "time" "unicode/utf8" )@@ -17,6 +22,8 @@
// Copy pasted from gorilla handler library, modified slightly const lowerhex = "0123456789abcdef" + +const apacheTS = "02/Jan/2006:15:04:05 -0700" func logFormatter(writer io.Writer, params handlers.LogFormatterParams) { buf := buildCommonLogLine(params.Request, params.URL, params.TimeStamp, params.StatusCode, params.Size)@@ -64,7 +71,7 @@ buf = append(buf, host...)
buf = append(buf, " - "...) buf = append(buf, username...) buf = append(buf, " ["...) - buf = append(buf, ts.Format("02/Jan/2006:15:04:05 -0700")...) + buf = append(buf, ts.Format(apacheTS)...) buf = append(buf, `] `...) buf = append(buf, desthost...) buf = append(buf, ` "`...)@@ -155,9 +162,113 @@ if err != nil {
host = ipAddr } line := fmt.Sprintf("gemini %s - [%s] %s %s\n", host, - time.Now().Format("02/Jan/2006:15:04:05 -0700"), + time.Now().Format(apacheTS), r.URL.Host, r.URL.Path) buf := []byte(line) log.Writer().Write(buf) } + +// notall fields set for both protocols +type LogLine struct { + Timestamp time.Time + Protocol string // gemini or http + ReqIP string // maybe rename here + ReqUser string + Status int + DestHost string + Method string + Path string +} + +func (ll *LogLine) insertInto(db *sql.DB) { + _, err := db.Exec(`insert into log (timestamp, protocol, request_ip, request_user, status, destination_host, path, method) +values (?, ?, ?, ?, ?, ?, ?, ?)`, ll.Timestamp.Format(time.RFC3339), ll.Protocol, ll.ReqIP, ll.ReqUser, ll.Status, ll.DestHost, ll.Path, ll.Method) + if err != nil { + fmt.Println(err) + } +} + +const httpLogRegex = `^(.*?) - (.*?) \[(.*?)\] (.*?) \"(.*) (.*) .*\" (\d*)` +const geminiLogRegex = `^gemini (.*?) - \[(.*?)\] (.*?) (.*)` + +var rxHttp *regexp.Regexp = regexp.MustCompile(httpLogRegex) +var rxGemini *regexp.Regexp = regexp.MustCompile(geminiLogRegex) + +func lineToLogLine(line string) (*LogLine, error) { + result := LogLine{} + var ts string + if strings.HasPrefix(line, "gemini") { + matches := rxGemini.FindStringSubmatch(line) + if len(matches) < 5 { + return nil, nil // TODO better error + } else { + result.ReqIP = matches[1] + ts = matches[2] + result.Timestamp, _ = time.Parse(apacheTS, ts) + result.DestHost = matches[3] + result.Path = matches[4] + result.Protocol = "gemini" + // etc + } + } else { + matches := rxHttp.FindStringSubmatch(line) + if len(matches) < 8 { + return nil, nil + } else { + result.ReqIP = matches[1] + result.ReqUser = matches[2] + ts = matches[3] + result.Timestamp, _ = time.Parse(apacheTS, ts) + result.DestHost = matches[4] + result.Method = matches[5] + result.Path = matches[6] + result.Status, _ = strconv.Atoi(matches[7]) + result.Protocol = "http" + } + } + return &result, nil +} + +func dumpLogs() { + fmt.Println("Writing missing logs to database") + db := getAnalyticsDB() + var maxTime string + row := db.QueryRow(`SELECT timestamp from log order by timestamp desc limit 1`) + err := row.Scan(&maxTime) + if err != nil { + // not perfect -- squashes errors + } + + file, err := os.Open(c.LogFile) + if err != nil { + log.Fatal(err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + counter := 0 + for scanner.Scan() { + text := scanner.Text() + ll, _ := lineToLogLine(text) + if ll == nil { + continue + } + if maxTime != "" { + max, err := time.Parse(time.RFC3339, maxTime) // ineff + if !ll.Timestamp.After(max) || err != nil { + // NOTE -- possible bug if two requests in the same second while we are reading -- skips 1 log + continue + } + } + ll.insertInto(db) + counter += 1 + } + fmt.Printf("Wrote %d logs\n", counter) +} + +func rotateLogs() { + // TODO write + // move log to log.1 + // delete log.1 +}
M
main.go
→
main.go
@@ -3,11 +3,13 @@
import ( "flag" "fmt" + "github.com/go-co-op/gocron" "github.com/gorilla/sessions" "io" "log" "os" "sync" + "time" ) var c Config // global var to hold static configuration@@ -46,8 +48,15 @@
cookie := generateCookieKeyIfDNE() SessionStore = sessions.NewCookieStore(cookie) + // handle background tasks + s1 := gocron.NewScheduler(time.UTC) + if c.AnalyticsDBFile != "" { + s1.Every(1).Day().Do(dumpLogs) // TODO Dont do on start? + } + switch args[0] { case "serve": + s1.StartAsync() wg := new(sync.WaitGroup) wg.Add(2) go func() {@@ -61,5 +70,7 @@ }()
wg.Wait() case "admin": runAdminCommand() + case "dumplogs": + dumpLogs() } }