audio.go (view raw)
1// Copyright 2011 Dmitry Chestnykh. All rights reserved.
2// Use of this source code is governed by a MIT-style
3// license that can be found in the LICENSE file.
4
5package captcha
6
7import (
8 "bytes"
9 "encoding/binary"
10 "io"
11
12 "math"
13 "math/rand"
14)
15
16const sampleRate = 8000 // Hz
17
18var (
19 endingBeepSound []byte
20)
21
22func init() {
23 endingBeepSound = changeSpeed(beepSound, 1.4)
24}
25
26type Audio struct {
27 body *bytes.Buffer
28 digitSounds [][]byte
29}
30
31// NewImage returns a new audio captcha with the given digits, where each digit
32// must be in range 0-9. Digits are pronounced in the given language. If there
33// are no sounds for the given language, English is used.
34func NewAudio(digits []byte, lang string) *Audio {
35 a := new(Audio)
36 if sounds, ok := digitSounds[lang]; ok {
37 a.digitSounds = sounds
38 } else {
39 a.digitSounds = digitSounds["en"]
40 }
41 numsnd := make([][]byte, len(digits))
42 nsdur := 0
43 for i, n := range digits {
44 snd := a.randomizedDigitSound(n)
45 nsdur += len(snd)
46 numsnd[i] = snd
47 }
48 // Random intervals between digits (including beginning).
49 intervals := make([]int, len(digits)+1)
50 intdur := 0
51 for i := range intervals {
52 dur := rnd(sampleRate, sampleRate*3) // 1 to 3 seconds
53 intdur += dur
54 intervals[i] = dur
55 }
56 // Generate background sound.
57 bg := a.makeBackgroundSound(a.longestDigitSndLen()*len(digits) + intdur)
58 // Create buffer and write audio to it.
59 sil := makeSilence(sampleRate / 5)
60 bufcap := 3*len(beepSound) + 2*len(sil) + len(bg) + len(endingBeepSound)
61 a.body = bytes.NewBuffer(make([]byte, 0, bufcap))
62 // Write prelude, three beeps.
63 a.body.Write(beepSound)
64 a.body.Write(sil)
65 a.body.Write(beepSound)
66 a.body.Write(sil)
67 a.body.Write(beepSound)
68 // Write digits.
69 pos := intervals[0]
70 for i, v := range numsnd {
71 mixSound(bg[pos:], v)
72 pos += len(v) + intervals[i+1]
73 }
74 a.body.Write(bg)
75 // Write ending (one beep).
76 a.body.Write(endingBeepSound)
77 return a
78}
79
80// WriteTo writes captcha audio in WAVE format into the given io.Writer, and
81// returns the number of bytes written and an error if any.
82func (a *Audio) WriteTo(w io.Writer) (n int64, err error) {
83 // Calculate padded length of PCM chunk data.
84 bodyLen := uint32(a.body.Len())
85 paddedBodyLen := bodyLen
86 if bodyLen%2 != 0 {
87 paddedBodyLen++
88 }
89 totalLen := uint32(len(waveHeader)) - 4 + paddedBodyLen
90 // Header.
91 header := make([]byte, len(waveHeader)+4) // includes 4 bytes for chunk size
92 copy(header, waveHeader)
93 // Put the length of whole RIFF chunk.
94 binary.LittleEndian.PutUint32(header[4:], totalLen)
95 // Put the length of WAVE chunk.
96 binary.LittleEndian.PutUint32(header[len(waveHeader):], bodyLen)
97 // Write header.
98 nn, err := w.Write(header)
99 n = int64(nn)
100 if err != nil {
101 return
102 }
103 // Write data.
104 n, err = a.body.WriteTo(w)
105 n += int64(nn)
106 if err != nil {
107 return
108 }
109 // Pad byte if chunk length is odd.
110 // (As header has even length, we can check if n is odd, not chunk).
111 if bodyLen != paddedBodyLen {
112 w.Write([]byte{0})
113 n++
114 }
115 return
116}
117
118// EncodedLen returns the length of WAV-encoded audio captcha.
119func (a *Audio) EncodedLen() int {
120 return len(waveHeader) + 4 + a.body.Len()
121}
122
123func (a *Audio) makeBackgroundSound(length int) []byte {
124 b := makeWhiteNoise(length, 4)
125 for i := 0; i < length/(sampleRate/10); i++ {
126 snd := reversedSound(a.digitSounds[rand.Intn(10)])
127 snd = changeSpeed(snd, rndf(0.8, 1.4))
128 place := rand.Intn(len(b) - len(snd))
129 setSoundLevel(snd, rndf(0.2, 0.5))
130 mixSound(b[place:], snd)
131 }
132 return b
133}
134
135func (a *Audio) randomizedDigitSound(n byte) []byte {
136 s := randomSpeed(a.digitSounds[n])
137 setSoundLevel(s, rndf(0.75, 1.2))
138 return s
139}
140
141func (a *Audio) longestDigitSndLen() int {
142 n := 0
143 for _, v := range a.digitSounds {
144 if n < len(v) {
145 n = len(v)
146 }
147 }
148 return n
149}
150
151// mixSound mixes src into dst. Dst must have length equal to or greater than
152// src length.
153func mixSound(dst, src []byte) {
154 for i, v := range src {
155 av := int(v)
156 bv := int(dst[i])
157 if av < 128 && bv < 128 {
158 dst[i] = byte(av * bv / 128)
159 } else {
160 dst[i] = byte(2*(av+bv) - av*bv/128 - 256)
161 }
162 }
163}
164
165func setSoundLevel(a []byte, level float64) {
166 for i, v := range a {
167 av := float64(v)
168 switch {
169 case av > 128:
170 if av = (av-128)*level + 128; av < 128 {
171 av = 128
172 }
173 case av < 128:
174 if av = 128 - (128-av)*level; av > 128 {
175 av = 128
176 }
177 default:
178 continue
179 }
180 a[i] = byte(av)
181 }
182}
183
184// changeSpeed returns new PCM bytes from the bytes with the speed and pitch
185// changed to the given value that must be in range [0, x].
186func changeSpeed(a []byte, speed float64) []byte {
187 b := make([]byte, int(math.Floor(float64(len(a))*speed)))
188 var p float64
189 for _, v := range a {
190 for i := int(p); i < int(p+speed); i++ {
191 b[i] = v
192 }
193 p += speed
194 }
195 return b
196}
197
198func randomSpeed(a []byte) []byte {
199 pitch := rndf(0.9, 1.2)
200 return changeSpeed(a, pitch)
201}
202
203func makeSilence(length int) []byte {
204 b := make([]byte, length)
205 for i := range b {
206 b[i] = 128
207 }
208 return b
209}
210
211func makeWhiteNoise(length int, level uint8) []byte {
212 noise := randomBytes(length)
213 adj := 128 - level/2
214 for i, v := range noise {
215 v %= level
216 v += adj
217 noise[i] = v
218 }
219 return noise
220}
221
222func reversedSound(a []byte) []byte {
223 n := len(a)
224 b := make([]byte, n)
225 for i, v := range a {
226 b[n-1-i] = v
227 }
228 return b
229}