videocr/models.py (view raw)
1from __future__ import annotations
2from typing import List
3from dataclasses import dataclass
4from fuzzywuzzy import fuzz
5
6
7CONF_THRESHOLD = 60
8# word predictions with lower confidence will be filtered out
9
10
11@dataclass
12class PredictedWord:
13 __slots__ = 'confidence', 'text'
14 confidence: int
15 text: str
16
17
18class PredictedFrame:
19 index: int # 0-based index of the frame
20 words: List[PredictedWord]
21 confidence: int # total confidence of all words
22 text: str
23
24 def __init__(self, index, pred_data: str):
25 self.index = index
26 self.words = []
27
28 block = 0 # keep track of line breaks
29
30 for l in pred_data.splitlines()[1:]:
31 word_data = l.split()
32 if len(word_data) < 12:
33 # no word is predicted
34 continue
35 _, _, block_num, *_, conf, text = word_data
36 block_num, conf = int(block_num), int(conf)
37
38 # handle line breaks
39 if block < block_num:
40 block = block_num
41 if self.words and self.words[-1].text != '\n':
42 self.words.append(PredictedWord(0, '\n'))
43
44 if conf >= CONF_THRESHOLD:
45 self.words.append(PredictedWord(conf, text))
46
47 self.confidence = sum(word.confidence for word in self.words)
48
49 self.text = ' '.join(word.text for word in self.words)
50 # remove chars that are obviously ocr errors
51 translate_table = {ord(c): None for c in '<>{};`@#$%^*_=\\'}
52 translate_table[ord('|')] = 'I'
53 self.text = self.text.translate(translate_table).strip()
54
55 def is_similar_to(self, other: PredictedFrame, threshold=70) -> bool:
56 return fuzz.partial_ratio(self.text, other.text) >= threshold
57
58
59class PredictedSubtitle:
60 frames: List[PredictedFrame]
61
62 def __init__(self, frames: List[PredictedFrame]):
63 self.frames = [f for f in frames if f.confidence > 0]
64
65 if self.frames:
66 conf_max = max(f.confidence for f in self.frames)
67 self.text = next(f.text for f in self.frames
68 if f.confidence == conf_max)
69 else:
70 self.text = ''
71
72 @property
73 def index_start(self) -> int:
74 if self.frames:
75 return self.frames[0].index
76 return 0
77
78 @property
79 def index_end(self) -> int:
80 if self.frames:
81 return self.frames[-1].index
82 return 0
83
84 def is_similar_to(self, other: PredictedSubtitle, threshold=70) -> bool:
85 return fuzz.partial_ratio(self.text, other.text) >= threshold
86
87 def __repr__(self):
88 return '{} - {}. {}'.format(self.index_start, self.index_end, self.text)