git rekt — videocr.git: 0e932936a173597553f82ae872ecae12fd604ddd

add PredictedSubtitle model

Yi Ge me@yige.ch

Thu, 25 Apr 2019 01:39:35 +0200

commit

0e932936a173597553f82ae872ecae12fd604ddd

parent

63873af476c8a732abae625f9464931cf23d2e59

1 files changed, 31 insertions(+), 0 deletions(-)

jump to

videocr/models.py

M videocr/models.py → videocr/models.py

@@ -1,6 +1,7 @@
 from __future__ import annotations
 from typing import List
 from dataclasses import dataclass
+from fuzzywuzzy import fuzz
 
 
 CONF_THRESHOLD = 60
@@ -45,3 +46,33 @@
         self.confidence = sum(word.confidence for word in self.words)
         self.text = ''.join(word.text + ' ' for word in self.words).strip()
 
+    def is_similar_to(self, other: PredictedFrame, threshold=60) -> bool:
+        if len(self.text) == 0 or len(other.text) == 0:
+            return False
+        return fuzz.ratio(self.text, other.text) >= threshold
+
+
+class PredictedSubtitle:
+    frames: List[PredictedFrame]
+
+    def __init__(self, frames: List[PredictedFrame]):
+        self.frames = [f for f in frames if f.confidence > 0]
+
+    @property
+    def text(self) -> str:
+        if self.frames:
+            conf_max = max(f.confidence for f in self.frames)
+            return next(f.text for f in self.frames if f.confidence == conf_max)
+        return ''
+
+    @property
+    def index_start(self) -> int:
+        if self.frames:
+            return self.frames[0].index
+        return 0
+
+    @property
+    def index_end(self) -> int:
+        if self.frames:
+            return self.frames[-1].index
+        return 0

all repos — videocr @ 0e932936a173597553f82ae872ecae12fd604ddd

Extract hardcoded subtitles from videos using machine learning