src/subs/subs.go (view raw)
1package subs
2
3import (
4 "bytes"
5 "encoding/xml"
6 "fmt"
7 "io"
8 "log"
9 "strings"
10)
11
12type TimedText struct {
13 XMLName xml.Name `xml:"timedtext"`
14 Body Body `xml:"body"`
15}
16
17type Body struct {
18 Paragraphs []Paragraph `xml:"p"`
19}
20
21type Paragraph struct {
22 Start int `xml:"t,attr"` // Start time in milliseconds
23 Length int `xml:"d,attr"` // Duration in milliseconds
24 Text string `xml:",chardata"` // Direct text (for cases without <s> tags)
25 Sentences []Sentence `xml:"s"` // List of <s> tags (for cases with individual words/phrases)
26}
27
28type Sentence struct {
29 Text string `xml:",chardata"` // Text inside the <s> tag
30 Time int `xml:"t,attr"` // Optional start time (not always present)
31}
32
33func writeVTT(output *bytes.Buffer, i, startTime, endTime int, sentence string) (int, error) {
34 return output.Write(
35 []byte(
36 fmt.Sprintf(
37 "%d\n%s --> %s\n%s\n\n",
38 i,
39 millisecondsToTimestamp(startTime),
40 millisecondsToTimestamp(endTime),
41 sentence,
42 ),
43 ))
44}
45
46// Convert milliseconds to WebVTT timestamp format: HH:MM:SS.mmm
47func millisecondsToTimestamp(ms int) string {
48 seconds := ms / 1000
49 milliseconds := ms % 1000
50 return fmt.Sprintf("%02d:%02d:%02d.%03d", seconds/3600, (seconds%3600)/60, seconds%60, milliseconds)
51}
52
53func Convert(reader io.Reader) (buffer bytes.Buffer, err error) {
54 content, err := io.ReadAll(reader)
55 if err != nil {
56 return
57 }
58
59 var timedText TimedText
60 err = xml.Unmarshal(content, &timedText)
61 if err != nil {
62 log.Println("Error unmarshalling XML:", err)
63 return
64 }
65
66 n, err := buffer.Write([]byte("WEBVTT\n\n"))
67 if err != nil {
68 return
69 }
70
71 l := len(timedText.Body.Paragraphs) - 1
72 var m int
73 for i, p := range timedText.Body.Paragraphs {
74 startTimeMS := p.Start
75 endTimeMS := p.Start + p.Length
76
77 if i < l {
78 nextStartTimeMS := timedText.Body.Paragraphs[i+1].Start
79 if nextStartTimeMS < endTimeMS {
80 endTimeMS = nextStartTimeMS
81 }
82 }
83
84 var sentence string
85 if len(p.Sentences) > 0 {
86 for _, s := range p.Sentences {
87 sentence += s.Text
88 }
89 } else {
90 sentence = p.Text
91 }
92
93 sentence = strings.TrimSpace(sentence)
94 if sentence == "" {
95 continue
96 }
97
98 m, err = writeVTT(&buffer, i+1, startTimeMS, endTimeMS, sentence)
99 n += m
100 if err != nil {
101 return
102 }
103 }
104
105 return
106}