move download_lang_data to utils.py
Yi Ge me@yige.ch
Sun, 15 Dec 2019 20:56:09 +0800
2 files changed,
23 insertions(+),
15 deletions(-)
M
videocr/api.py
→
videocr/api.py
@@ -1,24 +1,11 @@
-from urllib.request import urlopen -import shutil - -from . import constants +from . import utils from .video import Video def get_subtitles( video_path: str, lang='eng', time_start='0:00', time_end='', conf_threshold=65, sim_threshold=90, use_fullframe=False) -> str: - # download tesseract data files to ~/tessdata if necessary - constants.TESSDATA_DIR.mkdir(parents=True, exist_ok=True) - for fname in lang.split('+'): - fpath = constants.TESSDATA_DIR / '{}.traineddata'.format(fname) - if not fpath.is_file(): - if fname[0].isupper(): - url = constants.TESSDATA_SCRIPT_URL.format(fname) - else: - url = constants.TESSDATA_URL.format(fname) - with urlopen(url) as res, open(fpath, 'w+b') as f: - shutil.copyfileobj(res, f) + utils.download_lang_data(lang) v = Video(video_path) v.run_ocr(lang, time_start, time_end, conf_threshold, use_fullframe)
A
videocr/utils.py
@@ -0,0 +1,21 @@
+from urllib.request import urlopen +import shutil + +from . import constants + + +# download language data files to ~/tessdata if necessary +def download_lang_data(lang: str): + constants.TESSDATA_DIR.mkdir(parents=True, exist_ok=True) + + for lang_name in lang.split('+'): + filepath = constants.TESSDATA_DIR / '{}.traineddata'.format(lang_name) + if not filepath.is_file(): + # download needed file + if lang_name[0].isupper(): + url = constants.TESSDATA_SCRIPT_URL.format(lang_name) + else: + url = constants.TESSDATA_URL.format(lang_name) + + with urlopen(url) as res, open(filepath, 'w+b') as f: + shutil.copyfileobj(res, f)