all repos — flounder @ 874f2c2176ec383701dcb731e2a1349d81f83795

A small site builder for the Gemini protocol

log.go (view raw)

  1package main
  2
  3import (
  4	"bufio"
  5	"database/sql"
  6	"fmt"
  7	gmi "git.sr.ht/~adnano/go-gemini"
  8	"github.com/gorilla/handlers"
  9	"io"
 10	"log"
 11	"net"
 12	"net/http"
 13	"net/url"
 14	"os"
 15	"regexp"
 16	"strconv"
 17	"strings"
 18	"time"
 19	"unicode/utf8"
 20)
 21
 22// Copy pasted from gorilla handler library, modified slightly
 23
 24const lowerhex = "0123456789abcdef"
 25
 26const apacheTS = "02/Jan/2006:15:04:05 -0700"
 27
 28func logFormatter(writer io.Writer, params handlers.LogFormatterParams) {
 29	buf := buildCommonLogLine(params.Request, params.URL, params.TimeStamp, params.StatusCode, params.Size)
 30	buf = append(buf, '\n')
 31	writer.Write(buf)
 32}
 33
 34// buildCommonLogLine builds a log entry for req in Apache Common Log Format.
 35// ts is the timestamp with which the entry should be logged.
 36// status and size are used to provide the response HTTP status and size.
 37func buildCommonLogLine(req *http.Request, url url.URL, ts time.Time, status int, size int) []byte {
 38	user := getAuthUser(req)
 39	username := "-"
 40	if user.Username != "" {
 41		username = user.Username
 42	}
 43
 44	// Get forwarded IP address
 45	ipAddr := req.Header.Get("X-Forwarded-For") // TODO make so it works with many servers
 46	if ipAddr == "" {
 47		ipAddr = req.RemoteAddr
 48	}
 49	referer := req.Header.Get("Referer")
 50
 51	host, _, err := net.SplitHostPort(ipAddr)
 52	if err != nil {
 53		host = ipAddr
 54	}
 55
 56	uri := req.RequestURI
 57
 58	// Requests using the CONNECT method over HTTP/2.0 must use
 59	// the authority field (aka r.Host) to identify the target.
 60	// Refer: https://httpwg.github.io/specs/rfc7540.html#CONNECT
 61	if req.ProtoMajor == 2 && req.Method == "CONNECT" {
 62		uri = req.Host
 63	}
 64	if uri == "" {
 65		uri = url.RequestURI()
 66	}
 67
 68	desthost := req.Host
 69
 70	buf := make([]byte, 0, 3*(len(host)+len(desthost)+len(username)+len(req.Method)+len(uri)+len(req.Proto)+len(referer)+50)/2)
 71	buf = append(buf, host...)
 72	buf = append(buf, " - "...)
 73	buf = append(buf, username...)
 74	buf = append(buf, " ["...)
 75	buf = append(buf, ts.Format(apacheTS)...)
 76	buf = append(buf, `] `...)
 77	buf = append(buf, desthost...)
 78	buf = append(buf, ` "`...)
 79	buf = append(buf, req.Method...)
 80	buf = append(buf, " "...)
 81	buf = appendQuoted(buf, uri)
 82	buf = append(buf, " "...)
 83	buf = append(buf, req.Proto...)
 84	buf = append(buf, `" - `...)
 85	buf = append(buf, referer...)
 86	buf = append(buf, " - "...)
 87	buf = append(buf, strconv.Itoa(status)...)
 88	buf = append(buf, " "...)
 89	buf = append(buf, strconv.Itoa(size)...)
 90	return buf
 91}
 92
 93func appendQuoted(buf []byte, s string) []byte {
 94	var runeTmp [utf8.UTFMax]byte
 95	for width := 0; len(s) > 0; s = s[width:] {
 96		r := rune(s[0])
 97		width = 1
 98		if r >= utf8.RuneSelf {
 99			r, width = utf8.DecodeRuneInString(s)
100		}
101		if width == 1 && r == utf8.RuneError {
102			buf = append(buf, `\x`...)
103			buf = append(buf, lowerhex[s[0]>>4])
104			buf = append(buf, lowerhex[s[0]&0xF])
105			continue
106		}
107		if r == rune('"') || r == '\\' { // always backslashed
108			buf = append(buf, '\\')
109			buf = append(buf, byte(r))
110			continue
111		}
112		if strconv.IsPrint(r) {
113			n := utf8.EncodeRune(runeTmp[:], r)
114			buf = append(buf, runeTmp[:n]...)
115			continue
116		}
117		switch r {
118		case '\a':
119			buf = append(buf, `\a`...)
120		case '\b':
121			buf = append(buf, `\b`...)
122		case '\f':
123			buf = append(buf, `\f`...)
124		case '\n':
125			buf = append(buf, `\n`...)
126		case '\r':
127			buf = append(buf, `\r`...)
128		case '\t':
129			buf = append(buf, `\t`...)
130		case '\v':
131			buf = append(buf, `\v`...)
132		default:
133			switch {
134			case r < ' ':
135				buf = append(buf, `\x`...)
136				buf = append(buf, lowerhex[s[0]>>4])
137				buf = append(buf, lowerhex[s[0]&0xF])
138			case r > utf8.MaxRune:
139				r = 0xFFFD
140				fallthrough
141			case r < 0x10000:
142				buf = append(buf, `\u`...)
143				for s := 12; s >= 0; s -= 4 {
144					buf = append(buf, lowerhex[r>>uint(s)&0xF])
145				}
146			default:
147				buf = append(buf, `\U`...)
148				for s := 28; s >= 0; s -= 4 {
149					buf = append(buf, lowerhex[r>>uint(s)&0xF])
150				}
151			}
152		}
153	}
154	return buf
155}
156
157// Parse logs and write to database
158
159// Anonymize user and IP?
160
161func logGemini(r *gmi.Request) {
162	var ipAddr string
163	if r.RemoteAddr != nil {
164		ipAddr = r.RemoteAddr.String()
165	} else {
166		ipAddr = ""
167	}
168	host, _, err := net.SplitHostPort(ipAddr)
169	if err != nil {
170		host = ipAddr
171	}
172	line := fmt.Sprintf("gemini %s - [%s] %s %s\n", host,
173		time.Now().Format(apacheTS),
174		r.URL.Host,
175		r.URL.Path)
176	buf := []byte(line)
177	log.Writer().Write(buf)
178}
179
180// notall fields set for both protocols
181type LogLine struct {
182	Timestamp time.Time
183	Protocol  string // gemini or http
184	ReqIP     string // maybe rename here
185	ReqUser   string
186	Status    int
187	DestHost  string
188	Method    string
189	Referer   string
190	Path      string
191}
192
193func (ll *LogLine) insertInto(db *sql.DB) {
194	_, err := db.Exec(`insert into log (timestamp, protocol, request_ip, request_user, status, destination_host, path, method, referer)
195values (?, ?, ?, ?, ?, ?, ?, ?, ?)`, ll.Timestamp.Format(time.RFC3339), ll.Protocol, ll.ReqIP, ll.ReqUser, ll.Status, ll.DestHost, ll.Path, ll.Method, ll.Referer)
196	if err != nil {
197		fmt.Println(err)
198	}
199}
200
201const httpLogRegex = `^(.*?) - (.*?) \[(.*?)\] (.*?) \"(.*) (.*) .*\" - (.*) - (\d*)`
202const geminiLogRegex = `^gemini (.*?) - \[(.*?)\] (.*?) (.*)`
203
204var rxHttp *regexp.Regexp = regexp.MustCompile(httpLogRegex)
205var rxGemini *regexp.Regexp = regexp.MustCompile(geminiLogRegex)
206
207func lineToLogLine(line string) (*LogLine, error) {
208	result := LogLine{}
209	var ts string
210	if strings.HasPrefix(line, "gemini") {
211		matches := rxGemini.FindStringSubmatch(line)
212		if len(matches) < 5 {
213			return nil, nil // TODO better error
214		} else {
215			result.ReqIP = matches[1]
216			ts = matches[2]
217			result.Timestamp, _ = time.Parse(apacheTS, ts)
218			result.DestHost = matches[3]
219			result.Path = matches[4]
220			result.Protocol = "gemini"
221			// etc
222		}
223	} else {
224		matches := rxHttp.FindStringSubmatch(line)
225		if len(matches) < 8 {
226			return nil, nil
227		} else {
228			result.ReqIP = matches[1]
229			result.ReqUser = matches[2]
230			ts = matches[3]
231			result.Timestamp, _ = time.Parse(apacheTS, ts)
232			result.DestHost = matches[4]
233			result.Method = matches[5]
234			result.Path = matches[6]
235			result.Referer = matches[7]
236			result.Status, _ = strconv.Atoi(matches[8])
237			result.Protocol = "http"
238		}
239	}
240	return &result, nil
241}
242
243func dumpLogsWorker() {
244	log.Println("Starting analytics log writing worker")
245	for {
246		dumpLogs()
247		time.Sleep(time.Minute * 5)
248	}
249}
250func dumpLogs() {
251	log.Println("Writing missing logs to database")
252	db, err := getAnalyticsDB()
253	if err != nil {
254		// not perfect -- squashes errors
255		return
256	}
257	var maxTime string
258	row := db.QueryRow(`SELECT timestamp from log order by timestamp desc limit 1`)
259	err = row.Scan(&maxTime)
260	if err != nil {
261		// not perfect -- squashes errors
262		return
263	}
264
265	file, err := os.Open(c.LogFile)
266	if err != nil {
267		// not perfect -- squashes errors
268		return
269	}
270	defer file.Close()
271
272	scanner := bufio.NewScanner(file)
273	counter := 0
274	for scanner.Scan() {
275		text := scanner.Text()
276		ll, _ := lineToLogLine(text)
277		if ll == nil {
278			continue
279		}
280		if maxTime != "" {
281			max, err := time.Parse(time.RFC3339, maxTime) // ineff
282			if !ll.Timestamp.After(max) || err != nil {
283				// NOTE -- possible bug if two requests in the same second while we are reading -- skips 1 log
284				continue
285			}
286		}
287		ll.insertInto(db)
288		counter += 1
289	}
290	log.Printf("Wrote %d logs\n", counter)
291}
292
293func rotateLogs() {
294	// TODO write
295	// move log to log.1
296	// delete log.1
297}