all repos — flounder @ 2d5c7e1e084a1a795402e0ccca02e615d52209b1

A small site builder for the Gemini protocol

log.go (view raw)

  1package main
  2
  3import (
  4	"bufio"
  5	"database/sql"
  6	"fmt"
  7	gmi "git.sr.ht/~adnano/go-gemini"
  8	"github.com/gorilla/handlers"
  9	"io"
 10	"log"
 11	"net"
 12	"net/http"
 13	"net/url"
 14	"os"
 15	"regexp"
 16	"strconv"
 17	"strings"
 18	"time"
 19	"unicode/utf8"
 20)
 21
 22// Copy pasted from gorilla handler library, modified slightly
 23
 24const lowerhex = "0123456789abcdef"
 25
 26const apacheTS = "02/Jan/2006:15:04:05 -0700"
 27
 28func logFormatter(writer io.Writer, params handlers.LogFormatterParams) {
 29	buf := buildCommonLogLine(params.Request, params.URL, params.TimeStamp, params.StatusCode, params.Size)
 30	buf = append(buf, '\n')
 31	writer.Write(buf)
 32}
 33
 34// buildCommonLogLine builds a log entry for req in Apache Common Log Format.
 35// ts is the timestamp with which the entry should be logged.
 36// status and size are used to provide the response HTTP status and size.
 37func buildCommonLogLine(req *http.Request, url url.URL, ts time.Time, status int, size int) []byte {
 38	user := newGetAuthUser(req)
 39	username := "-"
 40	if user.Username != "" {
 41		username = user.Username
 42	}
 43
 44	// Get forwarded IP address
 45	ipAddr := req.Header.Get("X-Real-IP")
 46	if ipAddr == "" {
 47		ipAddr = req.RemoteAddr
 48	}
 49
 50	host, _, err := net.SplitHostPort(ipAddr)
 51	if err != nil {
 52		host = ipAddr
 53	}
 54
 55	uri := req.RequestURI
 56
 57	// Requests using the CONNECT method over HTTP/2.0 must use
 58	// the authority field (aka r.Host) to identify the target.
 59	// Refer: https://httpwg.github.io/specs/rfc7540.html#CONNECT
 60	if req.ProtoMajor == 2 && req.Method == "CONNECT" {
 61		uri = req.Host
 62	}
 63	if uri == "" {
 64		uri = url.RequestURI()
 65	}
 66
 67	desthost := req.Host
 68
 69	buf := make([]byte, 0, 3*(len(host)+len(desthost)+len(username)+len(req.Method)+len(uri)+len(req.Proto)+50)/2)
 70	buf = append(buf, host...)
 71	buf = append(buf, " - "...)
 72	buf = append(buf, username...)
 73	buf = append(buf, " ["...)
 74	buf = append(buf, ts.Format(apacheTS)...)
 75	buf = append(buf, `] `...)
 76	buf = append(buf, desthost...)
 77	buf = append(buf, ` "`...)
 78	buf = append(buf, req.Method...)
 79	buf = append(buf, " "...)
 80	buf = appendQuoted(buf, uri)
 81	buf = append(buf, " "...)
 82	buf = append(buf, req.Proto...)
 83	buf = append(buf, `" `...)
 84	buf = append(buf, strconv.Itoa(status)...)
 85	buf = append(buf, " "...)
 86	buf = append(buf, strconv.Itoa(size)...)
 87	return buf
 88}
 89
 90func appendQuoted(buf []byte, s string) []byte {
 91	var runeTmp [utf8.UTFMax]byte
 92	for width := 0; len(s) > 0; s = s[width:] {
 93		r := rune(s[0])
 94		width = 1
 95		if r >= utf8.RuneSelf {
 96			r, width = utf8.DecodeRuneInString(s)
 97		}
 98		if width == 1 && r == utf8.RuneError {
 99			buf = append(buf, `\x`...)
100			buf = append(buf, lowerhex[s[0]>>4])
101			buf = append(buf, lowerhex[s[0]&0xF])
102			continue
103		}
104		if r == rune('"') || r == '\\' { // always backslashed
105			buf = append(buf, '\\')
106			buf = append(buf, byte(r))
107			continue
108		}
109		if strconv.IsPrint(r) {
110			n := utf8.EncodeRune(runeTmp[:], r)
111			buf = append(buf, runeTmp[:n]...)
112			continue
113		}
114		switch r {
115		case '\a':
116			buf = append(buf, `\a`...)
117		case '\b':
118			buf = append(buf, `\b`...)
119		case '\f':
120			buf = append(buf, `\f`...)
121		case '\n':
122			buf = append(buf, `\n`...)
123		case '\r':
124			buf = append(buf, `\r`...)
125		case '\t':
126			buf = append(buf, `\t`...)
127		case '\v':
128			buf = append(buf, `\v`...)
129		default:
130			switch {
131			case r < ' ':
132				buf = append(buf, `\x`...)
133				buf = append(buf, lowerhex[s[0]>>4])
134				buf = append(buf, lowerhex[s[0]&0xF])
135			case r > utf8.MaxRune:
136				r = 0xFFFD
137				fallthrough
138			case r < 0x10000:
139				buf = append(buf, `\u`...)
140				for s := 12; s >= 0; s -= 4 {
141					buf = append(buf, lowerhex[r>>uint(s)&0xF])
142				}
143			default:
144				buf = append(buf, `\U`...)
145				for s := 28; s >= 0; s -= 4 {
146					buf = append(buf, lowerhex[r>>uint(s)&0xF])
147				}
148			}
149		}
150	}
151	return buf
152}
153
154// Parse logs and write to database
155
156// Anonymize user and IP?
157
158func logGemini(r *gmi.Request) {
159	ipAddr := r.RemoteAddr.String()
160	host, _, err := net.SplitHostPort(ipAddr)
161	if err != nil {
162		host = ipAddr
163	}
164	line := fmt.Sprintf("gemini %s - [%s] %s %s\n", host,
165		time.Now().Format(apacheTS),
166		r.URL.Host,
167		r.URL.Path)
168	buf := []byte(line)
169	log.Writer().Write(buf)
170}
171
172// notall fields set for both protocols
173type LogLine struct {
174	Timestamp time.Time
175	Protocol  string // gemini or http
176	ReqIP     string // maybe rename here
177	ReqUser   string
178	Status    int
179	DestHost  string
180	Method    string
181	Path      string
182}
183
184func (ll *LogLine) insertInto(db *sql.DB) {
185	_, err := db.Exec(`insert into log (timestamp, protocol, request_ip, request_user, status, destination_host, path, method)
186values (?, ?, ?, ?, ?, ?, ?, ?)`, ll.Timestamp.Format(time.RFC3339), ll.Protocol, ll.ReqIP, ll.ReqUser, ll.Status, ll.DestHost, ll.Path, ll.Method)
187	if err != nil {
188		fmt.Println(err)
189	}
190}
191
192const httpLogRegex = `^(.*?) - (.*?) \[(.*?)\] (.*?) \"(.*) (.*) .*\" (\d*)`
193const geminiLogRegex = `^gemini (.*?) - \[(.*?)\] (.*?) (.*)`
194
195var rxHttp *regexp.Regexp = regexp.MustCompile(httpLogRegex)
196var rxGemini *regexp.Regexp = regexp.MustCompile(geminiLogRegex)
197
198func lineToLogLine(line string) (*LogLine, error) {
199	result := LogLine{}
200	var ts string
201	if strings.HasPrefix(line, "gemini") {
202		matches := rxGemini.FindStringSubmatch(line)
203		if len(matches) < 5 {
204			return nil, nil // TODO better error
205		} else {
206			result.ReqIP = matches[1]
207			ts = matches[2]
208			result.Timestamp, _ = time.Parse(apacheTS, ts)
209			result.DestHost = matches[3]
210			result.Path = matches[4]
211			result.Protocol = "gemini"
212			// etc
213		}
214	} else {
215		matches := rxHttp.FindStringSubmatch(line)
216		if len(matches) < 8 {
217			return nil, nil
218		} else {
219			result.ReqIP = matches[1]
220			result.ReqUser = matches[2]
221			ts = matches[3]
222			result.Timestamp, _ = time.Parse(apacheTS, ts)
223			result.DestHost = matches[4]
224			result.Method = matches[5]
225			result.Path = matches[6]
226			result.Status, _ = strconv.Atoi(matches[7])
227			result.Protocol = "http"
228		}
229	}
230	return &result, nil
231}
232
233func dumpLogs() {
234	log.Println("Writing missing logs to database")
235	db := getAnalyticsDB()
236	var maxTime string
237	row := db.QueryRow(`SELECT timestamp from log order by timestamp desc limit 1`)
238	err := row.Scan(&maxTime)
239	if err != nil {
240		// not perfect -- squashes errors
241	}
242
243	file, err := os.Open(c.LogFile)
244	if err != nil {
245		log.Fatal(err)
246	}
247	defer file.Close()
248
249	scanner := bufio.NewScanner(file)
250	counter := 0
251	for scanner.Scan() {
252		text := scanner.Text()
253		ll, _ := lineToLogLine(text)
254		if ll == nil {
255			continue
256		}
257		if maxTime != "" {
258			max, err := time.Parse(time.RFC3339, maxTime) // ineff
259			if !ll.Timestamp.After(max) || err != nil {
260				// NOTE -- possible bug if two requests in the same second while we are reading -- skips 1 log
261				continue
262			}
263		}
264		ll.insertInto(db)
265		counter += 1
266	}
267	log.Printf("Wrote %d logs\n", counter)
268}
269
270func rotateLogs() {
271	// TODO write
272	// move log to log.1
273	// delete log.1
274}