videocr/api.py (view raw)
1from urllib.request import urlopen
2import shutil
3
4from . import constants
5from .video import Video
6
7
8def get_subtitles(video_path: str, lang='eng',
9 time_start='0:00', time_end='', use_fullframe=False) -> str:
10 # download tesseract data file to ~/tessdata if necessary
11 fpath = constants.TESSDATA_DIR / '{}.traineddata'.format(lang)
12 if not fpath.is_file():
13 if lang == 'eng':
14 url = constants.ENG_URL
15 else:
16 url = constants.TESSDATA_URL.format(lang)
17 constants.TESSDATA_DIR.mkdir(parents=True, exist_ok=True)
18 with urlopen(url) as res, open(fpath, 'w+b') as f:
19 shutil.copyfileobj(res, f)
20
21 v = Video(video_path)
22 v.run_ocr(lang, time_start, time_end, use_fullframe)
23 return v.get_subtitles()
24
25
26def save_subtitles_to_file(
27 video_path: str, file_path='subtitle.srt', lang='eng',
28 time_start='0:00', time_end='', use_fullframe=False) -> None:
29 with open(file_path, 'w+') as f:
30 f.write(get_subtitles(
31 video_path, lang, time_start, time_end, use_fullframe))