make conf_threshold adjustable through api
Yi Ge me@yige.ch
Mon, 29 Apr 2019 03:05:02 +0200
3 files changed,
16 insertions(+),
9 deletions(-)
M
videocr/api.py
→
videocr/api.py
@@ -5,8 +5,8 @@ from . import constants
from .video import Video -def get_subtitles(video_path: str, lang='eng', - time_start='0:00', time_end='', use_fullframe=False) -> str: +def get_subtitles(video_path: str, lang='eng', time_start='0:00', time_end='', + conf_threshold=65, use_fullframe=False) -> str: # download tesseract data file to ~/tessdata if necessary fpath = constants.TESSDATA_DIR / '{}.traineddata'.format(lang) if not fpath.is_file():@@ -19,13 +19,15 @@ with urlopen(url) as res, open(fpath, 'w+b') as f:
shutil.copyfileobj(res, f) v = Video(video_path) - v.run_ocr(lang, time_start, time_end, use_fullframe) + v.run_ocr(lang, time_start, time_end, conf_threshold, use_fullframe) return v.get_subtitles() def save_subtitles_to_file( video_path: str, file_path='subtitle.srt', lang='eng', - time_start='0:00', time_end='', use_fullframe=False) -> None: + time_start='0:00', time_end='', conf_threshold=65, + use_fullframe=False) -> None: with open(file_path, 'w+') as f: f.write(get_subtitles( - video_path, lang, time_start, time_end, use_fullframe)) + video_path, lang, time_start, time_end, conf_threshold, + use_fullframe))
M
videocr/models.py
→
videocr/models.py
@@ -17,7 +17,7 @@ words: List[PredictedWord]
confidence: int # total confidence of all words text: str - def __init__(self, index: int, pred_data: str, conf_threshold=70): + def __init__(self, index: int, pred_data: str, conf_threshold: int): self.index = index self.words = []
M
videocr/video.py
→
videocr/video.py
@@ -14,18 +14,22 @@ lang: str
use_fullframe: bool num_frames: int fps: float + height: int pred_frames: List[PredictedFrame] pred_subs: List[PredictedSubtitle] def __init__(self, path: str): self.path = path v = cv2.VideoCapture(path) + if not v.isOpened(): + raise IOError('can not open video format {}'.format(path)) self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT)) self.fps = v.get(cv2.CAP_PROP_FPS) + self.height = int(v.get(cv2.CAP_PROP_FRAME_HEIGHT)) v.release() def run_ocr(self, lang: str, time_start: str, time_end: str, - use_fullframe: bool) -> None: + conf_threshold: int, use_fullframe: bool) -> None: self.lang = lang self.use_fullframe = use_fullframe@@ -44,8 +48,9 @@
# perform ocr to frames in parallel with futures.ProcessPoolExecutor() as pool: ocr_map = pool.map(self._single_frame_ocr, frames, chunksize=10) - self.pred_frames = [PredictedFrame(i + ocr_start, data) - for i, data in enumerate(ocr_map)] + self.pred_frames = [ + PredictedFrame(i + ocr_start, data, conf_threshold) + for i, data in enumerate(ocr_map)] v.release()