git rekt — videocr.git: 77362dce1a38c7cdc3674b2b42883203a277531b

make conf_threshold adjustable through api

Yi Ge me@yige.ch

Mon, 29 Apr 2019 03:05:02 +0200

commit

77362dce1a38c7cdc3674b2b42883203a277531b

parent

a5e6845a1bc3d3497bda2e87b87bbeaf2541b3f2

3 files changed, 16 insertions(+), 9 deletions(-)

jump to

videocr/api.py

videocr/models.py

videocr/video.py

M videocr/api.py → videocr/api.py

@@ -5,8 +5,8 @@ from . import constants
 from .video import Video
 
 
-def get_subtitles(video_path: str, lang='eng',
-                  time_start='0:00', time_end='', use_fullframe=False) -> str:
+def get_subtitles(video_path: str, lang='eng', time_start='0:00', time_end='',
+                  conf_threshold=65, use_fullframe=False) -> str:
     # download tesseract data file to ~/tessdata if necessary
     fpath = constants.TESSDATA_DIR / '{}.traineddata'.format(lang)
     if not fpath.is_file():
@@ -19,13 +19,15 @@ with urlopen(url) as res, open(fpath, 'w+b') as f:
             shutil.copyfileobj(res, f)
 
     v = Video(video_path)
-    v.run_ocr(lang, time_start, time_end, use_fullframe)
+    v.run_ocr(lang, time_start, time_end, conf_threshold, use_fullframe)
     return v.get_subtitles()
 
 
 def save_subtitles_to_file(
         video_path: str, file_path='subtitle.srt', lang='eng',
-        time_start='0:00', time_end='', use_fullframe=False) -> None:
+        time_start='0:00', time_end='', conf_threshold=65,
+        use_fullframe=False) -> None:
     with open(file_path, 'w+') as f:
         f.write(get_subtitles(
-            video_path, lang, time_start, time_end, use_fullframe))
+            video_path, lang, time_start, time_end, conf_threshold,
+            use_fullframe))

M videocr/models.py → videocr/models.py

@@ -17,7 +17,7 @@ words: List[PredictedWord]
     confidence: int  # total confidence of all words
     text: str
 
-    def __init__(self, index: int, pred_data: str, conf_threshold=70):
+    def __init__(self, index: int, pred_data: str, conf_threshold: int):
         self.index = index
         self.words = []

M videocr/video.py → videocr/video.py

@@ -14,18 +14,22 @@ lang: str
     use_fullframe: bool
     num_frames: int
     fps: float
+    height: int
     pred_frames: List[PredictedFrame]
     pred_subs: List[PredictedSubtitle]
 
     def __init__(self, path: str):
         self.path = path
         v = cv2.VideoCapture(path)
+        if not v.isOpened():
+            raise IOError('can not open video format {}'.format(path))
         self.num_frames = int(v.get(cv2.CAP_PROP_FRAME_COUNT))
         self.fps = v.get(cv2.CAP_PROP_FPS)
+        self.height = int(v.get(cv2.CAP_PROP_FRAME_HEIGHT))
         v.release()
 
     def run_ocr(self, lang: str, time_start: str, time_end: str,
-                use_fullframe: bool) -> None:
+                conf_threshold: int, use_fullframe: bool) -> None:
         self.lang = lang
         self.use_fullframe = use_fullframe
 
@@ -44,8 +48,9 @@
         # perform ocr to frames in parallel
         with futures.ProcessPoolExecutor() as pool:
             ocr_map = pool.map(self._single_frame_ocr, frames, chunksize=10)
-            self.pred_frames = [PredictedFrame(i + ocr_start, data) 
-                                for i, data in enumerate(ocr_map)]
+            self.pred_frames = [
+                PredictedFrame(i + ocr_start, data, conf_threshold) 
+                for i, data in enumerate(ocr_map)]
 
         v.release()

all repos — videocr @ 77362dce1a38c7cdc3674b2b42883203a277531b

Extract hardcoded subtitles from videos using machine learning