log.go (view raw)
1package main
2
3import (
4 "bufio"
5 "database/sql"
6 "fmt"
7 gmi "git.sr.ht/~adnano/go-gemini"
8 "github.com/gorilla/handlers"
9 "io"
10 "log"
11 "net"
12 "net/http"
13 "net/url"
14 "os"
15 "regexp"
16 "strconv"
17 "strings"
18 "time"
19 "unicode/utf8"
20)
21
22// Copy pasted from gorilla handler library, modified slightly
23
24const lowerhex = "0123456789abcdef"
25
26const apacheTS = "02/Jan/2006:15:04:05 -0700"
27
28func logFormatter(writer io.Writer, params handlers.LogFormatterParams) {
29 buf := buildCommonLogLine(params.Request, params.URL, params.TimeStamp, params.StatusCode, params.Size)
30 buf = append(buf, '\n')
31 writer.Write(buf)
32}
33
34// buildCommonLogLine builds a log entry for req in Apache Common Log Format.
35// ts is the timestamp with which the entry should be logged.
36// status and size are used to provide the response HTTP status and size.
37func buildCommonLogLine(req *http.Request, url url.URL, ts time.Time, status int, size int) []byte {
38 user := newGetAuthUser(req)
39 username := "-"
40 if user.Username != "" {
41 username = user.Username
42 }
43
44 // Get forwarded IP address
45 ipAddr := req.Header.Get("X-Real-IP")
46 if ipAddr == "" {
47 ipAddr = req.RemoteAddr
48 }
49
50 host, _, err := net.SplitHostPort(ipAddr)
51 if err != nil {
52 host = ipAddr
53 }
54
55 uri := req.RequestURI
56
57 // Requests using the CONNECT method over HTTP/2.0 must use
58 // the authority field (aka r.Host) to identify the target.
59 // Refer: https://httpwg.github.io/specs/rfc7540.html#CONNECT
60 if req.ProtoMajor == 2 && req.Method == "CONNECT" {
61 uri = req.Host
62 }
63 if uri == "" {
64 uri = url.RequestURI()
65 }
66
67 desthost := req.Host
68
69 buf := make([]byte, 0, 3*(len(host)+len(desthost)+len(username)+len(req.Method)+len(uri)+len(req.Proto)+50)/2)
70 buf = append(buf, host...)
71 buf = append(buf, " - "...)
72 buf = append(buf, username...)
73 buf = append(buf, " ["...)
74 buf = append(buf, ts.Format(apacheTS)...)
75 buf = append(buf, `] `...)
76 buf = append(buf, desthost...)
77 buf = append(buf, ` "`...)
78 buf = append(buf, req.Method...)
79 buf = append(buf, " "...)
80 buf = appendQuoted(buf, uri)
81 buf = append(buf, " "...)
82 buf = append(buf, req.Proto...)
83 buf = append(buf, `" `...)
84 buf = append(buf, strconv.Itoa(status)...)
85 buf = append(buf, " "...)
86 buf = append(buf, strconv.Itoa(size)...)
87 return buf
88}
89
90func appendQuoted(buf []byte, s string) []byte {
91 var runeTmp [utf8.UTFMax]byte
92 for width := 0; len(s) > 0; s = s[width:] {
93 r := rune(s[0])
94 width = 1
95 if r >= utf8.RuneSelf {
96 r, width = utf8.DecodeRuneInString(s)
97 }
98 if width == 1 && r == utf8.RuneError {
99 buf = append(buf, `\x`...)
100 buf = append(buf, lowerhex[s[0]>>4])
101 buf = append(buf, lowerhex[s[0]&0xF])
102 continue
103 }
104 if r == rune('"') || r == '\\' { // always backslashed
105 buf = append(buf, '\\')
106 buf = append(buf, byte(r))
107 continue
108 }
109 if strconv.IsPrint(r) {
110 n := utf8.EncodeRune(runeTmp[:], r)
111 buf = append(buf, runeTmp[:n]...)
112 continue
113 }
114 switch r {
115 case '\a':
116 buf = append(buf, `\a`...)
117 case '\b':
118 buf = append(buf, `\b`...)
119 case '\f':
120 buf = append(buf, `\f`...)
121 case '\n':
122 buf = append(buf, `\n`...)
123 case '\r':
124 buf = append(buf, `\r`...)
125 case '\t':
126 buf = append(buf, `\t`...)
127 case '\v':
128 buf = append(buf, `\v`...)
129 default:
130 switch {
131 case r < ' ':
132 buf = append(buf, `\x`...)
133 buf = append(buf, lowerhex[s[0]>>4])
134 buf = append(buf, lowerhex[s[0]&0xF])
135 case r > utf8.MaxRune:
136 r = 0xFFFD
137 fallthrough
138 case r < 0x10000:
139 buf = append(buf, `\u`...)
140 for s := 12; s >= 0; s -= 4 {
141 buf = append(buf, lowerhex[r>>uint(s)&0xF])
142 }
143 default:
144 buf = append(buf, `\U`...)
145 for s := 28; s >= 0; s -= 4 {
146 buf = append(buf, lowerhex[r>>uint(s)&0xF])
147 }
148 }
149 }
150 }
151 return buf
152}
153
154// Parse logs and write to database
155
156// Anonymize user and IP?
157
158func logGemini(r *gmi.Request) {
159 ipAddr := r.RemoteAddr.String()
160 host, _, err := net.SplitHostPort(ipAddr)
161 if err != nil {
162 host = ipAddr
163 }
164 line := fmt.Sprintf("gemini %s - [%s] %s %s\n", host,
165 time.Now().Format(apacheTS),
166 r.URL.Host,
167 r.URL.Path)
168 buf := []byte(line)
169 log.Writer().Write(buf)
170}
171
172// notall fields set for both protocols
173type LogLine struct {
174 Timestamp time.Time
175 Protocol string // gemini or http
176 ReqIP string // maybe rename here
177 ReqUser string
178 Status int
179 DestHost string
180 Method string
181 Path string
182}
183
184func (ll *LogLine) insertInto(db *sql.DB) {
185 _, err := db.Exec(`insert into log (timestamp, protocol, request_ip, request_user, status, destination_host, path, method)
186values (?, ?, ?, ?, ?, ?, ?, ?)`, ll.Timestamp.Format(time.RFC3339), ll.Protocol, ll.ReqIP, ll.ReqUser, ll.Status, ll.DestHost, ll.Path, ll.Method)
187 if err != nil {
188 fmt.Println(err)
189 }
190}
191
192const httpLogRegex = `^(.*?) - (.*?) \[(.*?)\] (.*?) \"(.*) (.*) .*\" (\d*)`
193const geminiLogRegex = `^gemini (.*?) - \[(.*?)\] (.*?) (.*)`
194
195var rxHttp *regexp.Regexp = regexp.MustCompile(httpLogRegex)
196var rxGemini *regexp.Regexp = regexp.MustCompile(geminiLogRegex)
197
198func lineToLogLine(line string) (*LogLine, error) {
199 result := LogLine{}
200 var ts string
201 if strings.HasPrefix(line, "gemini") {
202 matches := rxGemini.FindStringSubmatch(line)
203 if len(matches) < 5 {
204 return nil, nil // TODO better error
205 } else {
206 result.ReqIP = matches[1]
207 ts = matches[2]
208 result.Timestamp, _ = time.Parse(apacheTS, ts)
209 result.DestHost = matches[3]
210 result.Path = matches[4]
211 result.Protocol = "gemini"
212 // etc
213 }
214 } else {
215 matches := rxHttp.FindStringSubmatch(line)
216 if len(matches) < 8 {
217 return nil, nil
218 } else {
219 result.ReqIP = matches[1]
220 result.ReqUser = matches[2]
221 ts = matches[3]
222 result.Timestamp, _ = time.Parse(apacheTS, ts)
223 result.DestHost = matches[4]
224 result.Method = matches[5]
225 result.Path = matches[6]
226 result.Status, _ = strconv.Atoi(matches[7])
227 result.Protocol = "http"
228 }
229 }
230 return &result, nil
231}
232
233func dumpLogs() {
234 fmt.Println("Writing missing logs to database")
235 db := getAnalyticsDB()
236 var maxTime string
237 row := db.QueryRow(`SELECT timestamp from log order by timestamp desc limit 1`)
238 err := row.Scan(&maxTime)
239 if err != nil {
240 // not perfect -- squashes errors
241 }
242
243 file, err := os.Open(c.LogFile)
244 if err != nil {
245 log.Fatal(err)
246 }
247 defer file.Close()
248
249 scanner := bufio.NewScanner(file)
250 counter := 0
251 for scanner.Scan() {
252 text := scanner.Text()
253 ll, _ := lineToLogLine(text)
254 if ll == nil {
255 continue
256 }
257 if maxTime != "" {
258 max, err := time.Parse(time.RFC3339, maxTime) // ineff
259 if !ll.Timestamp.After(max) || err != nil {
260 // NOTE -- possible bug if two requests in the same second while we are reading -- skips 1 log
261 continue
262 }
263 }
264 ll.insertInto(db)
265 counter += 1
266 }
267 fmt.Printf("Wrote %d logs\n", counter)
268}
269
270func rotateLogs() {
271 // TODO write
272 // move log to log.1
273 // delete log.1
274}