all repos — videocr @ da8cd05f08a92180317a123ae6b9e45ce6428de8

Extract hardcoded subtitles from videos using machine learning

use lazy map when performing parallel ocr
Yi Ge me@yige.ch
Fri, 17 May 2019 16:26:06 +0200
commit

da8cd05f08a92180317a123ae6b9e45ce6428de8

parent

c63e5086236df93dc8e668dd0e5935532a519a6c

1 files changed, 4 insertions(+), 4 deletions(-)

jump to
M videocr/video.pyvideocr/video.py

@@ -1,5 +1,5 @@

from __future__ import annotations -from concurrent import futures +from multiprocessing import Pool import datetime import pytesseract import cv2

@@ -46,11 +46,11 @@ v.set(cv2.CAP_PROP_POS_FRAMES, ocr_start)

frames = (v.read()[1] for _ in range(num_ocr_frames)) # perform ocr to frames in parallel - with futures.ProcessPoolExecutor() as pool: - ocr_map = pool.map(self._single_frame_ocr, frames, chunksize=10) + with Pool() as pool: + it_ocr = pool.imap(self._single_frame_ocr, frames, chunksize=10) self.pred_frames = [ PredictedFrame(i + ocr_start, data, conf_threshold) - for i, data in enumerate(ocr_map)] + for i, data in enumerate(it_ocr)] v.release()