src/subs/subs.go (view raw)
1package subs
2
3import (
4 "encoding/xml"
5 "fmt"
6 "io"
7 "log"
8 "strings"
9)
10
11type TimedText struct {
12 XMLName xml.Name `xml:"timedtext"`
13 Body Body `xml:"body"`
14}
15
16type Body struct {
17 Paragraphs []Paragraph `xml:"p"`
18}
19
20type Paragraph struct {
21 Start int `xml:"t,attr"` // Start time in milliseconds
22 Length int `xml:"d,attr"` // Duration in milliseconds
23 Text string `xml:",chardata"` // Direct text (for cases without <s> tags)
24 Sentences []Sentence `xml:"s"` // List of <s> tags (for cases with individual words/phrases)
25}
26
27type Sentence struct {
28 Text string `xml:",chardata"` // Text inside the <s> tag
29 Time int `xml:"t,attr"` // Optional start time (not always present)
30}
31
32func writeVTT(output io.Writer, i, startTime, endTime int, sentence string) {
33 output.Write(
34 []byte(
35 fmt.Sprintf(
36 "%d\n%s --> %s\n%s\n\n",
37 i,
38 millisecondsToTimestamp(startTime),
39 millisecondsToTimestamp(endTime),
40 sentence,
41 ),
42 ))
43}
44
45// Convert milliseconds to WebVTT timestamp format: HH:MM:SS.mmm
46func millisecondsToTimestamp(ms int) string {
47 seconds := ms / 1000
48 milliseconds := ms % 1000
49 return fmt.Sprintf("%02d:%02d:%02d.%03d", seconds/3600, (seconds%3600)/60, seconds%60, milliseconds)
50}
51
52func Convert(reader io.Reader, output io.Writer) error {
53 content, err := io.ReadAll(reader)
54 if err != nil {
55 return err
56 }
57
58 var timedText TimedText
59 err = xml.Unmarshal(content, &timedText)
60 if err != nil {
61 log.Println("Error unmarshalling XML:", err)
62 return err
63 }
64
65 output.Write([]byte("WEBVTT\n\n"))
66
67 var lastEndTime int
68 for i, p := range timedText.Body.Paragraphs {
69 startTimeMS := p.Start
70 endTimeMS := p.Start + p.Length
71
72 if startTimeMS < lastEndTime {
73 startTimeMS = lastEndTime
74 }
75
76 var sentence string
77 if len(p.Sentences) > 0 {
78 for _, s := range p.Sentences {
79 sentence += s.Text
80 }
81 } else {
82 sentence = p.Text
83 }
84
85 sentence = strings.TrimSpace(sentence)
86 if sentence == "" {
87 continue
88 }
89
90 lastEndTime = endTimeMS
91 writeVTT(output, i+1, startTimeMS, endTimeMS, sentence)
92 }
93
94 return nil
95}