all repos — albus @ 551d7cbd228803e1f3d4459468907a4469463658

albus/converter.py (view raw)

 1import os
 2from typing import Tuple
 3import pypandoc
 4
 5# Map of file extensions to output formats
 6CONVERSION_MAP = {
 7    'docx': 'pdf',
 8    'odt': 'pdf',
 9    'epub': 'html'
10}
11
12def join_extension(filename: str, extension: str) -> str:
13    return f"{filename}.{extension}"
14
15def split_extension(filename: str) -> Tuple[str, str]:
16    input_filename, input_extension = os.path.splitext(filename)
17    input_extension = input_extension.lower()[1:]  # Remove leading dot and convert to lowercase
18    return (input_filename, input_extension)
19
20def convert_file(input_file: str) -> Tuple[str, str]:
21    # Determine the input file extension
22    filename, extension = split_extension(input_file)
23
24    # Check if the input file extension is supported
25    if extension in CONVERSION_MAP:
26        output_format = CONVERSION_MAP[extension]
27
28        # Perform the conversion using Pandoc
29        pypandoc.convert_file(
30            input_file,
31            output_format,
32            outputfile=join_extension(filename, output_format),
33            extra_args=['--pdf-engine=tectonic']
34        )
35        return filename, output_format
36    else:
37        raise ValueError(f"Conversion from '{extension}' not supported.")