albus/converter.py (view raw)
1import os
2from typing import Tuple
3import pypandoc
4
5# Map of file extensions to output formats
6CONVERSION_MAP = {
7 'docx': 'pdf',
8 'odt': 'pdf',
9 'epub': 'html'
10}
11
12def join_extension(filename: str, extension: str) -> str:
13 return f"{filename}.{extension}"
14
15def split_extension(filename: str) -> Tuple[str, str]:
16 input_filename, input_extension = os.path.splitext(filename)
17 input_extension = input_extension.lower()[1:] # Remove leading dot and convert to lowercase
18 return (input_filename, input_extension)
19
20def convert_file(input_file: str) -> Tuple[str, str]:
21 # Determine the input file extension
22 filename, extension = split_extension(input_file)
23
24 # Check if the input file extension is supported
25 if extension in CONVERSION_MAP:
26 output_format = CONVERSION_MAP[extension]
27
28 # Perform the conversion using Pandoc
29 pypandoc.convert_file(
30 input_file,
31 output_format,
32 outputfile=join_extension(filename, output_format),
33 extra_args=['--pdf-engine=tectonic']
34 )
35 return filename, output_format
36 else:
37 raise ValueError(f"Conversion from '{extension}' not supported.")