all repos — flounder @ 58a76b756308ca8920f3091d35caf8220605f3da

A small site builder for the Gemini protocol

log.go (view raw)

  1package main
  2
  3import (
  4	"bufio"
  5	"database/sql"
  6	"fmt"
  7	gmi "git.sr.ht/~adnano/go-gemini"
  8	"github.com/gorilla/handlers"
  9	"io"
 10	"log"
 11	"net"
 12	"net/http"
 13	"net/url"
 14	"os"
 15	"regexp"
 16	"strconv"
 17	"strings"
 18	"time"
 19	"unicode/utf8"
 20)
 21
 22// Copy pasted from gorilla handler library, modified slightly
 23
 24const lowerhex = "0123456789abcdef"
 25
 26const apacheTS = "02/Jan/2006:15:04:05 -0700"
 27
 28func logFormatter(writer io.Writer, params handlers.LogFormatterParams) {
 29	buf := buildCommonLogLine(params.Request, params.URL, params.TimeStamp, params.StatusCode, params.Size)
 30	buf = append(buf, '\n')
 31	writer.Write(buf)
 32}
 33
 34// buildCommonLogLine builds a log entry for req in Apache Common Log Format.
 35// ts is the timestamp with which the entry should be logged.
 36// status and size are used to provide the response HTTP status and size.
 37func buildCommonLogLine(req *http.Request, url url.URL, ts time.Time, status int, size int) []byte {
 38	user := newGetAuthUser(req)
 39	username := "-"
 40	if user.Username != "" {
 41		username = user.Username
 42	}
 43
 44	// Get forwarded IP address
 45	ipAddr := req.Header.Get("X-Real-IP")
 46	if ipAddr == "" {
 47		ipAddr = req.RemoteAddr
 48	}
 49	referer := req.Header.Get("Referer")
 50
 51	host, _, err := net.SplitHostPort(ipAddr)
 52	if err != nil {
 53		host = ipAddr
 54	}
 55
 56	uri := req.RequestURI
 57
 58	// Requests using the CONNECT method over HTTP/2.0 must use
 59	// the authority field (aka r.Host) to identify the target.
 60	// Refer: https://httpwg.github.io/specs/rfc7540.html#CONNECT
 61	if req.ProtoMajor == 2 && req.Method == "CONNECT" {
 62		uri = req.Host
 63	}
 64	if uri == "" {
 65		uri = url.RequestURI()
 66	}
 67
 68	desthost := req.Host
 69
 70	buf := make([]byte, 0, 3*(len(host)+len(desthost)+len(username)+len(req.Method)+len(uri)+len(req.Proto)+len(referer)+50)/2)
 71	buf = append(buf, host...)
 72	buf = append(buf, " - "...)
 73	buf = append(buf, username...)
 74	buf = append(buf, " ["...)
 75	buf = append(buf, ts.Format(apacheTS)...)
 76	buf = append(buf, `] `...)
 77	buf = append(buf, desthost...)
 78	buf = append(buf, ` "`...)
 79	buf = append(buf, req.Method...)
 80	buf = append(buf, " "...)
 81	buf = appendQuoted(buf, uri)
 82	buf = append(buf, " "...)
 83	buf = append(buf, req.Proto...)
 84	buf = append(buf, `" - `...)
 85	buf = append(buf, referer...)
 86	buf = append(buf, " - "...)
 87	buf = append(buf, strconv.Itoa(status)...)
 88	buf = append(buf, " "...)
 89	buf = append(buf, strconv.Itoa(size)...)
 90	return buf
 91}
 92
 93func appendQuoted(buf []byte, s string) []byte {
 94	var runeTmp [utf8.UTFMax]byte
 95	for width := 0; len(s) > 0; s = s[width:] {
 96		r := rune(s[0])
 97		width = 1
 98		if r >= utf8.RuneSelf {
 99			r, width = utf8.DecodeRuneInString(s)
100		}
101		if width == 1 && r == utf8.RuneError {
102			buf = append(buf, `\x`...)
103			buf = append(buf, lowerhex[s[0]>>4])
104			buf = append(buf, lowerhex[s[0]&0xF])
105			continue
106		}
107		if r == rune('"') || r == '\\' { // always backslashed
108			buf = append(buf, '\\')
109			buf = append(buf, byte(r))
110			continue
111		}
112		if strconv.IsPrint(r) {
113			n := utf8.EncodeRune(runeTmp[:], r)
114			buf = append(buf, runeTmp[:n]...)
115			continue
116		}
117		switch r {
118		case '\a':
119			buf = append(buf, `\a`...)
120		case '\b':
121			buf = append(buf, `\b`...)
122		case '\f':
123			buf = append(buf, `\f`...)
124		case '\n':
125			buf = append(buf, `\n`...)
126		case '\r':
127			buf = append(buf, `\r`...)
128		case '\t':
129			buf = append(buf, `\t`...)
130		case '\v':
131			buf = append(buf, `\v`...)
132		default:
133			switch {
134			case r < ' ':
135				buf = append(buf, `\x`...)
136				buf = append(buf, lowerhex[s[0]>>4])
137				buf = append(buf, lowerhex[s[0]&0xF])
138			case r > utf8.MaxRune:
139				r = 0xFFFD
140				fallthrough
141			case r < 0x10000:
142				buf = append(buf, `\u`...)
143				for s := 12; s >= 0; s -= 4 {
144					buf = append(buf, lowerhex[r>>uint(s)&0xF])
145				}
146			default:
147				buf = append(buf, `\U`...)
148				for s := 28; s >= 0; s -= 4 {
149					buf = append(buf, lowerhex[r>>uint(s)&0xF])
150				}
151			}
152		}
153	}
154	return buf
155}
156
157// Parse logs and write to database
158
159// Anonymize user and IP?
160
161func logGemini(r *gmi.Request) {
162	ipAddr := r.RemoteAddr.String()
163	host, _, err := net.SplitHostPort(ipAddr)
164	if err != nil {
165		host = ipAddr
166	}
167	line := fmt.Sprintf("gemini %s - [%s] %s %s\n", host,
168		time.Now().Format(apacheTS),
169		r.URL.Host,
170		r.URL.Path)
171	buf := []byte(line)
172	log.Writer().Write(buf)
173}
174
175// notall fields set for both protocols
176type LogLine struct {
177	Timestamp time.Time
178	Protocol  string // gemini or http
179	ReqIP     string // maybe rename here
180	ReqUser   string
181	Status    int
182	DestHost  string
183	Method    string
184	Referer   string
185	Path      string
186}
187
188func (ll *LogLine) insertInto(db *sql.DB) {
189	_, err := db.Exec(`insert into log (timestamp, protocol, request_ip, request_user, status, destination_host, path, method, referer)
190values (?, ?, ?, ?, ?, ?, ?, ?, ?)`, ll.Timestamp.Format(time.RFC3339), ll.Protocol, ll.ReqIP, ll.ReqUser, ll.Status, ll.DestHost, ll.Path, ll.Method, ll.Referer)
191	if err != nil {
192		fmt.Println(err)
193	}
194}
195
196const httpLogRegex = `^(.*?) - (.*?) \[(.*?)\] (.*?) \"(.*) (.*) .*\" - (.*) - (\d*)`
197const geminiLogRegex = `^gemini (.*?) - \[(.*?)\] (.*?) (.*)`
198
199var rxHttp *regexp.Regexp = regexp.MustCompile(httpLogRegex)
200var rxGemini *regexp.Regexp = regexp.MustCompile(geminiLogRegex)
201
202func lineToLogLine(line string) (*LogLine, error) {
203	result := LogLine{}
204	var ts string
205	if strings.HasPrefix(line, "gemini") {
206		matches := rxGemini.FindStringSubmatch(line)
207		if len(matches) < 5 {
208			return nil, nil // TODO better error
209		} else {
210			result.ReqIP = matches[1]
211			ts = matches[2]
212			result.Timestamp, _ = time.Parse(apacheTS, ts)
213			result.DestHost = matches[3]
214			result.Path = matches[4]
215			result.Protocol = "gemini"
216			// etc
217		}
218	} else {
219		matches := rxHttp.FindStringSubmatch(line)
220		if len(matches) < 8 {
221			return nil, nil
222		} else {
223			result.ReqIP = matches[1]
224			result.ReqUser = matches[2]
225			ts = matches[3]
226			result.Timestamp, _ = time.Parse(apacheTS, ts)
227			result.DestHost = matches[4]
228			result.Method = matches[5]
229			result.Path = matches[6]
230			result.Referer = matches[7]
231			result.Status, _ = strconv.Atoi(matches[8])
232			result.Protocol = "http"
233		}
234	}
235	return &result, nil
236}
237
238func dumpLogs() {
239	log.Println("Writing missing logs to database")
240	db := getAnalyticsDB()
241	var maxTime string
242	row := db.QueryRow(`SELECT timestamp from log order by timestamp desc limit 1`)
243	err := row.Scan(&maxTime)
244	if err != nil {
245		// not perfect -- squashes errors
246	}
247
248	file, err := os.Open(c.LogFile)
249	if err != nil {
250		log.Fatal(err)
251	}
252	defer file.Close()
253
254	scanner := bufio.NewScanner(file)
255	counter := 0
256	for scanner.Scan() {
257		text := scanner.Text()
258		ll, _ := lineToLogLine(text)
259		if ll == nil {
260			continue
261		}
262		if maxTime != "" {
263			max, err := time.Parse(time.RFC3339, maxTime) // ineff
264			if !ll.Timestamp.After(max) || err != nil {
265				// NOTE -- possible bug if two requests in the same second while we are reading -- skips 1 log
266				continue
267			}
268		}
269		ll.insertInto(db)
270		counter += 1
271	}
272	log.Printf("Wrote %d logs\n", counter)
273}
274
275func rotateLogs() {
276	// TODO write
277	// move log to log.1
278	// delete log.1
279}