all repos — videocr @ 63873af476c8a732abae625f9464931cf23d2e59

Extract hardcoded subtitles from videos using machine learning

videocr/models.py (view raw)

 1from __future__ import annotations
 2from typing import List
 3from dataclasses import dataclass
 4
 5
 6CONF_THRESHOLD = 60
 7# word predictions with lower confidence will be filtered out
 8
 9
10@dataclass
11class PredictedWord:
12    __slots__ = 'confidence', 'text'
13    confidence: int
14    text: str
15
16
17class PredictedFrame:
18    index: int  # 0-based index of the frame
19    words: List[PredictedWord]
20    confidence: int  # total confidence of all words
21    text: str
22
23    def __init__(self, index, pred_data: str):
24        self.index = index
25        self.words = []
26
27        block = 0  # keep track of line breaks
28
29        for l in pred_data.splitlines()[1:]:
30            word_data = l.split()
31            if len(word_data) < 12:
32                # no word is predicted
33                continue
34            _, _, block_num, *_, conf, text = word_data
35            block_num, conf = int(block_num), int(conf)
36
37            # handle line breaks
38            if block < block_num:
39                block = block_num
40                self.words.append(PredictedWord(0, '\n'))
41
42            if conf >= CONF_THRESHOLD:
43                self.words.append(PredictedWord(conf, text))
44
45        self.confidence = sum(word.confidence for word in self.words)
46        self.text = ''.join(word.text + ' ' for word in self.words).strip()
47