git rekt — videocr.git: 720c9d479ffc8e6f314d823102d99ef2b581cf66

move download_lang_data to utils.py

Yi Ge me@yige.ch

Sun, 15 Dec 2019 20:56:09 +0800

commit

720c9d479ffc8e6f314d823102d99ef2b581cf66

parent

8a56cbf746e5d550853ca53409b0fdda29ac706d

2 files changed, 23 insertions(+), 15 deletions(-)

jump to

videocr/api.py

videocr/utils.py

M videocr/api.py → videocr/api.py

@@ -1,24 +1,11 @@
-from urllib.request import urlopen
-import shutil
-
-from . import constants
+from . import utils
 from .video import Video
 
 
 def get_subtitles(
         video_path: str, lang='eng', time_start='0:00', time_end='',
         conf_threshold=65, sim_threshold=90, use_fullframe=False) -> str:
-    # download tesseract data files to ~/tessdata if necessary
-    constants.TESSDATA_DIR.mkdir(parents=True, exist_ok=True)
-    for fname in lang.split('+'):
-        fpath = constants.TESSDATA_DIR / '{}.traineddata'.format(fname)
-        if not fpath.is_file():
-            if fname[0].isupper():
-                url = constants.TESSDATA_SCRIPT_URL.format(fname)
-            else:
-                url = constants.TESSDATA_URL.format(fname)
-            with urlopen(url) as res, open(fpath, 'w+b') as f:
-                shutil.copyfileobj(res, f)
+    utils.download_lang_data(lang)
 
     v = Video(video_path)
     v.run_ocr(lang, time_start, time_end, conf_threshold, use_fullframe)

A videocr/utils.py

@@ -0,0 +1,21 @@
+from urllib.request import urlopen
+import shutil
+
+from . import constants
+
+
+# download language data files to ~/tessdata if necessary
+def download_lang_data(lang: str):
+    constants.TESSDATA_DIR.mkdir(parents=True, exist_ok=True)
+
+    for lang_name in lang.split('+'):
+        filepath = constants.TESSDATA_DIR / '{}.traineddata'.format(lang_name)
+        if not filepath.is_file():
+            # download needed file
+            if lang_name[0].isupper():
+                url = constants.TESSDATA_SCRIPT_URL.format(lang_name)
+            else:
+                url = constants.TESSDATA_URL.format(lang_name)
+
+            with urlopen(url) as res, open(filepath, 'w+b') as f:
+                shutil.copyfileobj(res, f)

all repos — videocr @ 720c9d479ffc8e6f314d823102d99ef2b581cf66

Extract hardcoded subtitles from videos using machine learning