all repos — videocr @ 0e932936a173597553f82ae872ecae12fd604ddd

Extract hardcoded subtitles from videos using machine learning

add PredictedSubtitle model
Yi Ge me@yige.ch
Thu, 25 Apr 2019 01:39:35 +0200
commit

0e932936a173597553f82ae872ecae12fd604ddd

parent

63873af476c8a732abae625f9464931cf23d2e59

1 files changed, 31 insertions(+), 0 deletions(-)

jump to
M videocr/models.pyvideocr/models.py

@@ -1,6 +1,7 @@

from __future__ import annotations from typing import List from dataclasses import dataclass +from fuzzywuzzy import fuzz CONF_THRESHOLD = 60

@@ -45,3 +46,33 @@

self.confidence = sum(word.confidence for word in self.words) self.text = ''.join(word.text + ' ' for word in self.words).strip() + def is_similar_to(self, other: PredictedFrame, threshold=60) -> bool: + if len(self.text) == 0 or len(other.text) == 0: + return False + return fuzz.ratio(self.text, other.text) >= threshold + + +class PredictedSubtitle: + frames: List[PredictedFrame] + + def __init__(self, frames: List[PredictedFrame]): + self.frames = [f for f in frames if f.confidence > 0] + + @property + def text(self) -> str: + if self.frames: + conf_max = max(f.confidence for f in self.frames) + return next(f.text for f in self.frames if f.confidence == conf_max) + return '' + + @property + def index_start(self) -> int: + if self.frames: + return self.frames[0].index + return 0 + + @property + def index_end(self) -> int: + if self.frames: + return self.frames[-1].index + return 0