log.go (view raw)
1package main
2
3import (
4 "bufio"
5 "database/sql"
6 "fmt"
7 gmi "git.sr.ht/~adnano/go-gemini"
8 "github.com/gorilla/handlers"
9 "io"
10 "log"
11 "net"
12 "net/http"
13 "net/url"
14 "os"
15 "regexp"
16 "strconv"
17 "strings"
18 "time"
19 "unicode/utf8"
20)
21
22// Copy pasted from gorilla handler library, modified slightly
23
24const lowerhex = "0123456789abcdef"
25
26const apacheTS = "02/Jan/2006:15:04:05 -0700"
27
28func logFormatter(writer io.Writer, params handlers.LogFormatterParams) {
29 buf := buildCommonLogLine(params.Request, params.URL, params.TimeStamp, params.StatusCode, params.Size)
30 buf = append(buf, '\n')
31 writer.Write(buf)
32}
33
34// buildCommonLogLine builds a log entry for req in Apache Common Log Format.
35// ts is the timestamp with which the entry should be logged.
36// status and size are used to provide the response HTTP status and size.
37func buildCommonLogLine(req *http.Request, url url.URL, ts time.Time, status int, size int) []byte {
38 user := newGetAuthUser(req)
39 username := "-"
40 if user.Username != "" {
41 username = user.Username
42 }
43
44 // Get forwarded IP address
45 ipAddr := req.Header.Get("X-Forwarded-For") // TODO make so it works with many servers
46 if ipAddr == "" {
47 ipAddr = req.RemoteAddr
48 }
49 referer := req.Header.Get("Referer")
50
51 host, _, err := net.SplitHostPort(ipAddr)
52 if err != nil {
53 host = ipAddr
54 }
55
56 uri := req.RequestURI
57
58 // Requests using the CONNECT method over HTTP/2.0 must use
59 // the authority field (aka r.Host) to identify the target.
60 // Refer: https://httpwg.github.io/specs/rfc7540.html#CONNECT
61 if req.ProtoMajor == 2 && req.Method == "CONNECT" {
62 uri = req.Host
63 }
64 if uri == "" {
65 uri = url.RequestURI()
66 }
67
68 desthost := req.Host
69
70 buf := make([]byte, 0, 3*(len(host)+len(desthost)+len(username)+len(req.Method)+len(uri)+len(req.Proto)+len(referer)+50)/2)
71 buf = append(buf, host...)
72 buf = append(buf, " - "...)
73 buf = append(buf, username...)
74 buf = append(buf, " ["...)
75 buf = append(buf, ts.Format(apacheTS)...)
76 buf = append(buf, `] `...)
77 buf = append(buf, desthost...)
78 buf = append(buf, ` "`...)
79 buf = append(buf, req.Method...)
80 buf = append(buf, " "...)
81 buf = appendQuoted(buf, uri)
82 buf = append(buf, " "...)
83 buf = append(buf, req.Proto...)
84 buf = append(buf, `" - `...)
85 buf = append(buf, referer...)
86 buf = append(buf, " - "...)
87 buf = append(buf, strconv.Itoa(status)...)
88 buf = append(buf, " "...)
89 buf = append(buf, strconv.Itoa(size)...)
90 return buf
91}
92
93func appendQuoted(buf []byte, s string) []byte {
94 var runeTmp [utf8.UTFMax]byte
95 for width := 0; len(s) > 0; s = s[width:] {
96 r := rune(s[0])
97 width = 1
98 if r >= utf8.RuneSelf {
99 r, width = utf8.DecodeRuneInString(s)
100 }
101 if width == 1 && r == utf8.RuneError {
102 buf = append(buf, `\x`...)
103 buf = append(buf, lowerhex[s[0]>>4])
104 buf = append(buf, lowerhex[s[0]&0xF])
105 continue
106 }
107 if r == rune('"') || r == '\\' { // always backslashed
108 buf = append(buf, '\\')
109 buf = append(buf, byte(r))
110 continue
111 }
112 if strconv.IsPrint(r) {
113 n := utf8.EncodeRune(runeTmp[:], r)
114 buf = append(buf, runeTmp[:n]...)
115 continue
116 }
117 switch r {
118 case '\a':
119 buf = append(buf, `\a`...)
120 case '\b':
121 buf = append(buf, `\b`...)
122 case '\f':
123 buf = append(buf, `\f`...)
124 case '\n':
125 buf = append(buf, `\n`...)
126 case '\r':
127 buf = append(buf, `\r`...)
128 case '\t':
129 buf = append(buf, `\t`...)
130 case '\v':
131 buf = append(buf, `\v`...)
132 default:
133 switch {
134 case r < ' ':
135 buf = append(buf, `\x`...)
136 buf = append(buf, lowerhex[s[0]>>4])
137 buf = append(buf, lowerhex[s[0]&0xF])
138 case r > utf8.MaxRune:
139 r = 0xFFFD
140 fallthrough
141 case r < 0x10000:
142 buf = append(buf, `\u`...)
143 for s := 12; s >= 0; s -= 4 {
144 buf = append(buf, lowerhex[r>>uint(s)&0xF])
145 }
146 default:
147 buf = append(buf, `\U`...)
148 for s := 28; s >= 0; s -= 4 {
149 buf = append(buf, lowerhex[r>>uint(s)&0xF])
150 }
151 }
152 }
153 }
154 return buf
155}
156
157// Parse logs and write to database
158
159// Anonymize user and IP?
160
161func logGemini(r *gmi.Request) {
162 var ipAddr string
163 if r.RemoteAddr != nil {
164 ipAddr = r.RemoteAddr.String()
165 } else {
166 ipAddr = ""
167 }
168 host, _, err := net.SplitHostPort(ipAddr)
169 if err != nil {
170 host = ipAddr
171 }
172 line := fmt.Sprintf("gemini %s - [%s] %s %s\n", host,
173 time.Now().Format(apacheTS),
174 r.URL.Host,
175 r.URL.Path)
176 buf := []byte(line)
177 log.Writer().Write(buf)
178}
179
180// notall fields set for both protocols
181type LogLine struct {
182 Timestamp time.Time
183 Protocol string // gemini or http
184 ReqIP string // maybe rename here
185 ReqUser string
186 Status int
187 DestHost string
188 Method string
189 Referer string
190 Path string
191}
192
193func (ll *LogLine) insertInto(db *sql.DB) {
194 _, err := db.Exec(`insert into log (timestamp, protocol, request_ip, request_user, status, destination_host, path, method, referer)
195values (?, ?, ?, ?, ?, ?, ?, ?, ?)`, ll.Timestamp.Format(time.RFC3339), ll.Protocol, ll.ReqIP, ll.ReqUser, ll.Status, ll.DestHost, ll.Path, ll.Method, ll.Referer)
196 if err != nil {
197 fmt.Println(err)
198 }
199}
200
201const httpLogRegex = `^(.*?) - (.*?) \[(.*?)\] (.*?) \"(.*) (.*) .*\" - (.*) - (\d*)`
202const geminiLogRegex = `^gemini (.*?) - \[(.*?)\] (.*?) (.*)`
203
204var rxHttp *regexp.Regexp = regexp.MustCompile(httpLogRegex)
205var rxGemini *regexp.Regexp = regexp.MustCompile(geminiLogRegex)
206
207func lineToLogLine(line string) (*LogLine, error) {
208 result := LogLine{}
209 var ts string
210 if strings.HasPrefix(line, "gemini") {
211 matches := rxGemini.FindStringSubmatch(line)
212 if len(matches) < 5 {
213 return nil, nil // TODO better error
214 } else {
215 result.ReqIP = matches[1]
216 ts = matches[2]
217 result.Timestamp, _ = time.Parse(apacheTS, ts)
218 result.DestHost = matches[3]
219 result.Path = matches[4]
220 result.Protocol = "gemini"
221 // etc
222 }
223 } else {
224 matches := rxHttp.FindStringSubmatch(line)
225 if len(matches) < 8 {
226 return nil, nil
227 } else {
228 result.ReqIP = matches[1]
229 result.ReqUser = matches[2]
230 ts = matches[3]
231 result.Timestamp, _ = time.Parse(apacheTS, ts)
232 result.DestHost = matches[4]
233 result.Method = matches[5]
234 result.Path = matches[6]
235 result.Referer = matches[7]
236 result.Status, _ = strconv.Atoi(matches[8])
237 result.Protocol = "http"
238 }
239 }
240 return &result, nil
241}
242
243func dumpLogsWorker() {
244 log.Println("Starting analytics log writing worker")
245 for {
246 dumpLogs()
247 time.Sleep(time.Minute * 5)
248 }
249}
250func dumpLogs() {
251 log.Println("Writing missing logs to database")
252 db, err := getAnalyticsDB()
253 if err != nil {
254 // not perfect -- squashes errors
255 return
256 }
257 var maxTime string
258 row := db.QueryRow(`SELECT timestamp from log order by timestamp desc limit 1`)
259 err = row.Scan(&maxTime)
260 if err != nil {
261 // not perfect -- squashes errors
262 return
263 }
264
265 file, err := os.Open(c.LogFile)
266 if err != nil {
267 // not perfect -- squashes errors
268 return
269 }
270 defer file.Close()
271
272 scanner := bufio.NewScanner(file)
273 counter := 0
274 for scanner.Scan() {
275 text := scanner.Text()
276 ll, _ := lineToLogLine(text)
277 if ll == nil {
278 continue
279 }
280 if maxTime != "" {
281 max, err := time.Parse(time.RFC3339, maxTime) // ineff
282 if !ll.Timestamp.After(max) || err != nil {
283 // NOTE -- possible bug if two requests in the same second while we are reading -- skips 1 log
284 continue
285 }
286 }
287 ll.insertInto(db)
288 counter += 1
289 }
290 log.Printf("Wrote %d logs\n", counter)
291}
292
293func rotateLogs() {
294 // TODO write
295 // move log to log.1
296 // delete log.1
297}