videocr/video.py (view raw)
1from __future__ import annotations
2from concurrent import futures
3import pytesseract
4import cv2
5import timeit
6
7from .models import PredictedFrame
8
9
10class Video:
11 path: str
12 lang: str
13 num_frames: int
14
15 def __init__(self, path, lang):
16 self.path = path
17 self.lang = lang
18 v = cv2.VideoCapture(path)
19 self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT))
20 v.release()
21
22 def _frame_ocr(self, img):
23 data = pytesseract.image_to_data(img, lang=self.lang)
24 return data
25
26 def run_ocr(self):
27 v = cv2.VideoCapture(self.path)
28 print(self.num_frames)
29 frames = (v.read()[1] for _ in range(40))
30
31 with futures.ProcessPoolExecutor() as pool:
32 frames_ocr = pool.map(self._frame_ocr, frames, chunksize=1)
33 for i, data in enumerate(frames_ocr):
34 pred = PredictedFrame(i, data)
35 print(pred.text)
36
37 v.release()
38
39
40time_start = timeit.default_timer()
41v = Video('1.mp4', 'HanS')
42v.run_ocr()
43time_stop = timeit.default_timer()
44print(time_stop - time_start)