log.go (view raw)
1package main
2
3import (
4 "bufio"
5 "database/sql"
6 "fmt"
7 gmi "git.sr.ht/~adnano/go-gemini"
8 "github.com/gorilla/handlers"
9 "io"
10 "log"
11 "net"
12 "net/http"
13 "net/url"
14 "os"
15 "regexp"
16 "strconv"
17 "strings"
18 "time"
19 "unicode/utf8"
20)
21
22// Copy pasted from gorilla handler library, modified slightly
23
24const lowerhex = "0123456789abcdef"
25
26const apacheTS = "02/Jan/2006:15:04:05 -0700"
27
28func logFormatter(writer io.Writer, params handlers.LogFormatterParams) {
29 buf := buildCommonLogLine(params.Request, params.URL, params.TimeStamp, params.StatusCode, params.Size)
30 buf = append(buf, '\n')
31 writer.Write(buf)
32}
33
34// buildCommonLogLine builds a log entry for req in Apache Common Log Format.
35// ts is the timestamp with which the entry should be logged.
36// status and size are used to provide the response HTTP status and size.
37func buildCommonLogLine(req *http.Request, url url.URL, ts time.Time, status int, size int) []byte {
38 user := newGetAuthUser(req)
39 username := "-"
40 if user.Username != "" {
41 username = user.Username
42 }
43
44 // Get forwarded IP address
45 ipAddr := req.Header.Get("X-Real-IP")
46 if ipAddr == "" {
47 ipAddr = req.RemoteAddr
48 }
49 referer := req.Header.Get("Referer")
50
51 host, _, err := net.SplitHostPort(ipAddr)
52 if err != nil {
53 host = ipAddr
54 }
55
56 uri := req.RequestURI
57
58 // Requests using the CONNECT method over HTTP/2.0 must use
59 // the authority field (aka r.Host) to identify the target.
60 // Refer: https://httpwg.github.io/specs/rfc7540.html#CONNECT
61 if req.ProtoMajor == 2 && req.Method == "CONNECT" {
62 uri = req.Host
63 }
64 if uri == "" {
65 uri = url.RequestURI()
66 }
67
68 desthost := req.Host
69
70 buf := make([]byte, 0, 3*(len(host)+len(desthost)+len(username)+len(req.Method)+len(uri)+len(req.Proto)+len(referer)+50)/2)
71 buf = append(buf, host...)
72 buf = append(buf, " - "...)
73 buf = append(buf, username...)
74 buf = append(buf, " ["...)
75 buf = append(buf, ts.Format(apacheTS)...)
76 buf = append(buf, `] `...)
77 buf = append(buf, desthost...)
78 buf = append(buf, ` "`...)
79 buf = append(buf, req.Method...)
80 buf = append(buf, " "...)
81 buf = appendQuoted(buf, uri)
82 buf = append(buf, " "...)
83 buf = append(buf, req.Proto...)
84 buf = append(buf, `" - `...)
85 buf = append(buf, referer...)
86 buf = append(buf, " - "...)
87 buf = append(buf, strconv.Itoa(status)...)
88 buf = append(buf, " "...)
89 buf = append(buf, strconv.Itoa(size)...)
90 return buf
91}
92
93func appendQuoted(buf []byte, s string) []byte {
94 var runeTmp [utf8.UTFMax]byte
95 for width := 0; len(s) > 0; s = s[width:] {
96 r := rune(s[0])
97 width = 1
98 if r >= utf8.RuneSelf {
99 r, width = utf8.DecodeRuneInString(s)
100 }
101 if width == 1 && r == utf8.RuneError {
102 buf = append(buf, `\x`...)
103 buf = append(buf, lowerhex[s[0]>>4])
104 buf = append(buf, lowerhex[s[0]&0xF])
105 continue
106 }
107 if r == rune('"') || r == '\\' { // always backslashed
108 buf = append(buf, '\\')
109 buf = append(buf, byte(r))
110 continue
111 }
112 if strconv.IsPrint(r) {
113 n := utf8.EncodeRune(runeTmp[:], r)
114 buf = append(buf, runeTmp[:n]...)
115 continue
116 }
117 switch r {
118 case '\a':
119 buf = append(buf, `\a`...)
120 case '\b':
121 buf = append(buf, `\b`...)
122 case '\f':
123 buf = append(buf, `\f`...)
124 case '\n':
125 buf = append(buf, `\n`...)
126 case '\r':
127 buf = append(buf, `\r`...)
128 case '\t':
129 buf = append(buf, `\t`...)
130 case '\v':
131 buf = append(buf, `\v`...)
132 default:
133 switch {
134 case r < ' ':
135 buf = append(buf, `\x`...)
136 buf = append(buf, lowerhex[s[0]>>4])
137 buf = append(buf, lowerhex[s[0]&0xF])
138 case r > utf8.MaxRune:
139 r = 0xFFFD
140 fallthrough
141 case r < 0x10000:
142 buf = append(buf, `\u`...)
143 for s := 12; s >= 0; s -= 4 {
144 buf = append(buf, lowerhex[r>>uint(s)&0xF])
145 }
146 default:
147 buf = append(buf, `\U`...)
148 for s := 28; s >= 0; s -= 4 {
149 buf = append(buf, lowerhex[r>>uint(s)&0xF])
150 }
151 }
152 }
153 }
154 return buf
155}
156
157// Parse logs and write to database
158
159// Anonymize user and IP?
160
161func logGemini(r *gmi.Request) {
162 ipAddr := r.RemoteAddr.String()
163 host, _, err := net.SplitHostPort(ipAddr)
164 if err != nil {
165 host = ipAddr
166 }
167 line := fmt.Sprintf("gemini %s - [%s] %s %s\n", host,
168 time.Now().Format(apacheTS),
169 r.URL.Host,
170 r.URL.Path)
171 buf := []byte(line)
172 log.Writer().Write(buf)
173}
174
175// notall fields set for both protocols
176type LogLine struct {
177 Timestamp time.Time
178 Protocol string // gemini or http
179 ReqIP string // maybe rename here
180 ReqUser string
181 Status int
182 DestHost string
183 Method string
184 Referer string
185 Path string
186}
187
188func (ll *LogLine) insertInto(db *sql.DB) {
189 _, err := db.Exec(`insert into log (timestamp, protocol, request_ip, request_user, status, destination_host, path, method, referer)
190values (?, ?, ?, ?, ?, ?, ?, ?, ?)`, ll.Timestamp.Format(time.RFC3339), ll.Protocol, ll.ReqIP, ll.ReqUser, ll.Status, ll.DestHost, ll.Path, ll.Method, ll.Referer)
191 if err != nil {
192 fmt.Println(err)
193 }
194}
195
196const httpLogRegex = `^(.*?) - (.*?) \[(.*?)\] (.*?) \"(.*) (.*) .*\" - (.*) - (\d*)`
197const geminiLogRegex = `^gemini (.*?) - \[(.*?)\] (.*?) (.*)`
198
199var rxHttp *regexp.Regexp = regexp.MustCompile(httpLogRegex)
200var rxGemini *regexp.Regexp = regexp.MustCompile(geminiLogRegex)
201
202func lineToLogLine(line string) (*LogLine, error) {
203 result := LogLine{}
204 var ts string
205 if strings.HasPrefix(line, "gemini") {
206 matches := rxGemini.FindStringSubmatch(line)
207 if len(matches) < 5 {
208 return nil, nil // TODO better error
209 } else {
210 result.ReqIP = matches[1]
211 ts = matches[2]
212 result.Timestamp, _ = time.Parse(apacheTS, ts)
213 result.DestHost = matches[3]
214 result.Path = matches[4]
215 result.Protocol = "gemini"
216 // etc
217 }
218 } else {
219 matches := rxHttp.FindStringSubmatch(line)
220 if len(matches) < 8 {
221 return nil, nil
222 } else {
223 result.ReqIP = matches[1]
224 result.ReqUser = matches[2]
225 ts = matches[3]
226 result.Timestamp, _ = time.Parse(apacheTS, ts)
227 result.DestHost = matches[4]
228 result.Method = matches[5]
229 result.Path = matches[6]
230 result.Referer = matches[7]
231 result.Status, _ = strconv.Atoi(matches[8])
232 result.Protocol = "http"
233 }
234 }
235 return &result, nil
236}
237
238func dumpLogs() {
239 log.Println("Writing missing logs to database")
240 db := getAnalyticsDB()
241 var maxTime string
242 row := db.QueryRow(`SELECT timestamp from log order by timestamp desc limit 1`)
243 err := row.Scan(&maxTime)
244 if err != nil {
245 // not perfect -- squashes errors
246 }
247
248 file, err := os.Open(c.LogFile)
249 if err != nil {
250 log.Fatal(err)
251 }
252 defer file.Close()
253
254 scanner := bufio.NewScanner(file)
255 counter := 0
256 for scanner.Scan() {
257 text := scanner.Text()
258 ll, _ := lineToLogLine(text)
259 if ll == nil {
260 continue
261 }
262 if maxTime != "" {
263 max, err := time.Parse(time.RFC3339, maxTime) // ineff
264 if !ll.Timestamp.After(max) || err != nil {
265 // NOTE -- possible bug if two requests in the same second while we are reading -- skips 1 log
266 continue
267 }
268 }
269 ll.insertInto(db)
270 counter += 1
271 }
272 log.Printf("Wrote %d logs\n", counter)
273}
274
275func rotateLogs() {
276 // TODO write
277 // move log to log.1
278 // delete log.1
279}