all repos — gopipe @ main

Embed YouTube videos on Telegram, Discord and more!

src/subs/subs.go (view raw)

  1package subs
  2
  3import (
  4	"bytes"
  5	"encoding/xml"
  6	"fmt"
  7	"io"
  8	"log"
  9	"strings"
 10)
 11
 12type TimedText struct {
 13	XMLName xml.Name `xml:"timedtext"`
 14	Body    Body     `xml:"body"`
 15}
 16
 17type Body struct {
 18	Paragraphs []Paragraph `xml:"p"`
 19}
 20
 21type Paragraph struct {
 22	Start     int        `xml:"t,attr"`    // Start time in milliseconds
 23	Length    int        `xml:"d,attr"`    // Duration in milliseconds
 24	Text      string     `xml:",chardata"` // Direct text (for cases without <s> tags)
 25	Sentences []Sentence `xml:"s"`         // List of <s> tags (for cases with individual words/phrases)
 26}
 27
 28type Sentence struct {
 29	Text string `xml:",chardata"` // Text inside the <s> tag
 30	Time int    `xml:"t,attr"`    // Optional start time (not always present)
 31}
 32
 33func writeVTT(output *bytes.Buffer, i, startTime, endTime int, sentence string) (int, error) {
 34	return output.Write(
 35		[]byte(
 36			fmt.Sprintf(
 37				"%d\n%s --> %s\n%s\n\n",
 38				i,
 39				millisecondsToTimestamp(startTime),
 40				millisecondsToTimestamp(endTime),
 41				sentence,
 42			),
 43		))
 44}
 45
 46// Convert milliseconds to WebVTT timestamp format: HH:MM:SS.mmm
 47func millisecondsToTimestamp(ms int) string {
 48	seconds := ms / 1000
 49	milliseconds := ms % 1000
 50	return fmt.Sprintf("%02d:%02d:%02d.%03d", seconds/3600, (seconds%3600)/60, seconds%60, milliseconds)
 51}
 52
 53func Convert(reader io.Reader) (buffer bytes.Buffer, err error) {
 54	content, err := io.ReadAll(reader)
 55	if err != nil {
 56		return
 57	}
 58
 59	var timedText TimedText
 60	err = xml.Unmarshal(content, &timedText)
 61	if err != nil {
 62		log.Println("Error unmarshalling XML:", err)
 63		return
 64	}
 65
 66	n, err := buffer.Write([]byte("WEBVTT\n\n"))
 67	if err != nil {
 68		return
 69	}
 70
 71	l := len(timedText.Body.Paragraphs) - 1
 72	var m int
 73	for i, p := range timedText.Body.Paragraphs {
 74		startTimeMS := p.Start
 75		endTimeMS := p.Start + p.Length
 76
 77		if i < l {
 78			nextStartTimeMS := timedText.Body.Paragraphs[i+1].Start
 79			if nextStartTimeMS < endTimeMS {
 80				endTimeMS = nextStartTimeMS
 81			}
 82		}
 83
 84		var sentence string
 85		if len(p.Sentences) > 0 {
 86			for _, s := range p.Sentences {
 87				sentence += s.Text
 88			}
 89		} else {
 90			sentence = p.Text
 91		}
 92
 93		sentence = strings.TrimSpace(sentence)
 94		if sentence == "" {
 95			continue
 96		}
 97
 98		m, err = writeVTT(&buffer, i+1, startTimeMS, endTimeMS, sentence)
 99		n += m
100		if err != nil {
101			return
102		}
103	}
104
105	return
106}