videocr/models.py (view raw)
1from __future__ import annotations
2from typing import List
3from dataclasses import dataclass
4
5
6CONF_THRESHOLD = 60
7# word predictions with lower confidence will be filtered out
8
9
10@dataclass
11class PredictedWord:
12 __slots__ = 'confidence', 'text'
13 confidence: int
14 text: str
15
16
17class PredictedFrame:
18 index: int # 0-based index of the frame
19 words: List[PredictedWord]
20 confidence: int # total confidence of all words
21 text: str
22
23 def __init__(self, index, pred_data: str):
24 self.index = index
25 self.words = []
26
27 block = 0 # keep track of line breaks
28
29 for l in pred_data.splitlines()[1:]:
30 word_data = l.split()
31 if len(word_data) < 12:
32 # no word is predicted
33 continue
34 _, _, block_num, *_, conf, text = word_data
35 block_num, conf = int(block_num), int(conf)
36
37 # handle line breaks
38 if block < block_num:
39 block = block_num
40 self.words.append(PredictedWord(0, '\n'))
41
42 if conf >= CONF_THRESHOLD:
43 self.words.append(PredictedWord(conf, text))
44
45 self.confidence = sum(word.confidence for word in self.words)
46 self.text = ''.join(word.text + ' ' for word in self.words).strip()
47