initial commit
Marco Andronaco andronacomarco@gmail.com
Sun, 26 May 2024 03:42:57 +0200
15 files changed,
438 insertions(+),
0 deletions(-)
A
.github/.workflows/publish.yaml
@@ -0,0 +1,48 @@
+# +name: Create and publish a Docker image + +# Configures this workflow to run every time a change is pushed to the branch called `main`. +on: + push: + branches: ['main'] + +# Defines two custom environment variables for the workflow. These are used for the Container registry domain, and a name for the Docker image that this workflow builds. +env: + REGISTRY: ghcr.io + IMAGE_NAME: ${{ github.repository }} + +# There is a single job in this workflow. It's configured to run on the latest available version of Ubuntu. +jobs: + build-and-push-image: + runs-on: ubuntu-latest + # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. + permissions: + contents: read + packages: write + # + steps: + - name: Checkout repository + uses: actions/checkout@v3 + # Uses the `docker/login-action` action to log in to the Container registry registry using the account and password that will publish the packages. Once published, the packages are scoped to the account defined here. + - name: Log in to the Container registry + uses: docker/login-action@65b78e6e13532edd9afa3aa52ac7964289d1a9c1 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + # This step uses [docker/metadata-action](https://github.com/docker/metadata-action#about) to extract tags and labels that will be applied to the specified image. The `id` "meta" allows the output of this step to be referenced in a subsequent step. The `images` value provides the base name for the tags and labels. + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + # This step uses the `docker/build-push-action` action to build the image, based on your repository's `Dockerfile`. If the build succeeds, it pushes the image to GitHub Packages. + # It uses the `context` parameter to define the build's context as the set of files located in the specified path. For more information, see "[Usage](https://github.com/docker/build-push-action#usage)" in the README of the `docker/build-push-action` repository. + # It uses the `tags` and `labels` parameters to tag and label the image with the output from the "meta" step. + - name: Build and push Docker image + uses: docker/build-push-action@f2a1d5e99d037542a71f64918e516c093c6f3fc4 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }}
A
.vscode/launch.json
@@ -0,0 +1,14 @@
+{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Module", + "type": "debugpy", + "request": "launch", + "module": "albus.bot" + } + ] +}
A
Dockerfile
@@ -0,0 +1,30 @@
+# Use the Alpine Linux base image +FROM alpine:latest + +# Install dependencies +RUN apk update && \ + apk add --no-cache \ + pandoc \ + python3 \ + py3-pip \ + build-base \ + tectonic + +# Set the working directory +WORKDIR /app + +# Copy the requirements.txt file and install Python dependencies +COPY requirements.txt . + +RUN pip install --no-cache-dir --break-system-packages -r requirements.txt + +# Copy the rest of the application code +COPY albus albus + +# Add a non-root user and use it +RUN adduser -D appuser && \ + chown -R appuser /app +USER appuser + +# Set the entrypoint to your application or provide a command for the container to run +CMD ["python", "-m", "albus.bot"]
A
LICENSE
@@ -0,0 +1,21 @@
+MIT License + +Copyright (c) 2024 Marco Andronaco + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.
A
Makefile
@@ -0,0 +1,24 @@
+# Variables +IMAGE_NAME=albus +REQUIREMENTS=requirements.txt +ENV_FILE=.env + +.PHONY: all clean build + +all: build + +# Export requirements using Poetry +$(REQUIREMENTS): pyproject.toml poetry.lock + poetry export -f requirements.txt --output $(REQUIREMENTS) --without-hashes + +# Build the Docker image +build: $(REQUIREMENTS) + docker build -t $(IMAGE_NAME) . + +# Clean up the requirements.txt file +clean: + rm -f $(REQUIREMENTS) + +# Utility target to run the Docker container +run: $(ENV_FILE) + docker-compose up -d
A
README.md
@@ -0,0 +1,16 @@
+# Albus + +A Telegram Bot that converts documents to more readable formats. + +## Usage + +``` +cp .env.example .env +``` + +Put your Telegram Bot Token inside `.env`, then: + +``` +make +make run +```
A
albus/bot.py
@@ -0,0 +1,54 @@
+import os +from telegram import Update +from telegram.ext import ApplicationBuilder, MessageHandler, CommandHandler, ContextTypes, filters +from .converter import convert_file, split_extension, join_extension +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +# Retrieve the Telegram bot token from the environment variables +TOKEN = os.getenv('TELEGRAM_BOT_TOKEN') + +async def start(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + await update.message.reply_text('Hello! Send me a file and I will convert it for you.') + +async def convert_document(update: Update, context: ContextTypes.DEFAULT_TYPE) -> None: + file_id = update.message.document.file_id + file_name_full = update.message.document.file_name + + file_name, file_extension = split_extension(file_name_full) + + # Download the file + file_ptr = await context.bot.get_file(file_id) + file_path = f"{file_ptr.file_unique_id}.{file_extension}" + + await file_ptr.download_to_drive(custom_path=file_path) + + # Convert the file + converted_file_name, extension = convert_file(file_path) + + # Rename the file + converted_file_path = join_extension(converted_file_name, extension) + renamed_file_path = join_extension(file_name, extension) + os.rename(converted_file_path, renamed_file_path) + + # Send the converted file back + with open(renamed_file_path, 'rb') as document: + await update.message.reply_document(document) + + # Clean up the files + os.remove(file_path) + os.remove(renamed_file_path) + +def main() -> None: + app = ApplicationBuilder().token(TOKEN).build() + + app.add_handler(CommandHandler("start", start)) + app.add_handler(MessageHandler(filters.ATTACHMENT, convert_document)) + + print("Bot is online.") + app.run_polling() + +if __name__ == '__main__': + main()
A
albus/converter.py
@@ -0,0 +1,37 @@
+import os +from typing import Tuple +import pypandoc + +# Map of file extensions to output formats +CONVERSION_MAP = { + 'docx': 'pdf', + 'odt': 'pdf', + 'epub': 'html' +} + +def join_extension(filename: str, extension: str) -> str: + return f"{filename}.{extension}" + +def split_extension(filename: str) -> Tuple[str, str]: + input_filename, input_extension = os.path.splitext(filename) + input_extension = input_extension.lower()[1:] # Remove leading dot and convert to lowercase + return (input_filename, input_extension) + +def convert_file(input_file: str) -> Tuple[str, str]: + # Determine the input file extension + filename, extension = split_extension(input_file) + + # Check if the input file extension is supported + if extension in CONVERSION_MAP: + output_format = CONVERSION_MAP[extension] + + # Perform the conversion using Pandoc + pypandoc.convert_file( + input_file, + output_format, + outputfile=join_extension(filename, output_format), + extra_args=['--pdf-engine=tectonic'] + ) + return filename, output_format + else: + raise ValueError(f"Conversion from '{extension}' not supported.")
A
docker-compose.yaml
@@ -0,0 +1,8 @@
+services: + albus: + container_name: albus + image: albus + build: + context: . + env_file: .env + restart: unless-stopped
A
poetry.lock
@@ -0,0 +1,165 @@
+# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. + +[[package]] +name = "anyio" +version = "4.3.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.8" +files = [ + {file = "anyio-4.3.0-py3-none-any.whl", hash = "sha256:048e05d0f6caeed70d731f3db756d35dcc1f35747c8c403364a8332c630441b8"}, + {file = "anyio-4.3.0.tar.gz", hash = "sha256:f75253795a87df48568485fd18cdd2a3fa5c4f7c5be8e5e36637733fce06fed6"}, +] + +[package.dependencies] +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (>=0.23)"] + +[[package]] +name = "certifi" +version = "2024.2.2" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.2.2-py3-none-any.whl", hash = "sha256:dc383c07b76109f368f6106eee2b593b04a011ea4d55f652c6ca24a754d1cdd1"}, + {file = "certifi-2024.2.2.tar.gz", hash = "sha256:0569859f95fc761b18b45ef421b1290a0f65f147e92a1e5eb3e635f9a5e4e66f"}, +] + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "httpcore" +version = "1.0.5" +description = "A minimal low-level HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpcore-1.0.5-py3-none-any.whl", hash = "sha256:421f18bac248b25d310f3cacd198d55b8e6125c107797b609ff9b7a6ba7991b5"}, + {file = "httpcore-1.0.5.tar.gz", hash = "sha256:34a38e2f9291467ee3b44e89dd52615370e152954ba21721378a87b2960f7a61"}, +] + +[package.dependencies] +certifi = "*" +h11 = ">=0.13,<0.15" + +[package.extras] +asyncio = ["anyio (>=4.0,<5.0)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] +trio = ["trio (>=0.22.0,<0.26.0)"] + +[[package]] +name = "httpx" +version = "0.27.0" +description = "The next generation HTTP client." +optional = false +python-versions = ">=3.8" +files = [ + {file = "httpx-0.27.0-py3-none-any.whl", hash = "sha256:71d5465162c13681bff01ad59b2cc68dd838ea1f10e51574bac27103f00c91a5"}, + {file = "httpx-0.27.0.tar.gz", hash = "sha256:a0cb88a46f32dc874e04ee956e4c2764aba2aa228f650b06788ba6bda2962ab5"}, +] + +[package.dependencies] +anyio = "*" +certifi = "*" +httpcore = "==1.*" +idna = "*" +sniffio = "*" + +[package.extras] +brotli = ["brotli", "brotlicffi"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (==1.*)"] + +[[package]] +name = "idna" +version = "3.7" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.7-py3-none-any.whl", hash = "sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0"}, + {file = "idna-3.7.tar.gz", hash = "sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc"}, +] + +[[package]] +name = "pypandoc" +version = "1.13" +description = "Thin wrapper for pandoc." +optional = false +python-versions = ">=3.6" +files = [ + {file = "pypandoc-1.13-py3-none-any.whl", hash = "sha256:4c7d71bf2f1ed122aac287113b5c4d537a33bbc3c1df5aed11a7d4a7ac074681"}, + {file = "pypandoc-1.13.tar.gz", hash = "sha256:31652073c7960c2b03570bd1e94f602ca9bc3e70099df5ead4cea98ff5151c1e"}, +] + +[[package]] +name = "python-dotenv" +version = "1.0.1" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.1.tar.gz", hash = "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca"}, + {file = "python_dotenv-1.0.1-py3-none-any.whl", hash = "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + +[[package]] +name = "python-telegram-bot" +version = "21.2" +description = "We have made you a wrapper you can't refuse" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-telegram-bot-21.2.tar.gz", hash = "sha256:2ebb462a98f502727d108c00bb50c513a68ddaf9545298c42f13996a9acf8354"}, + {file = "python_telegram_bot-21.2-py3-none-any.whl", hash = "sha256:af0f45d61521126de98f5bdc8a75a9df8b93d0c35d18b018181ca7648a38b017"}, +] + +[package.dependencies] +httpx = ">=0.27,<1.0" + +[package.extras] +all = ["APScheduler (>=3.10.4,<3.11.0)", "aiolimiter (>=1.1.0,<1.2.0)", "cachetools (>=5.3.3,<5.4.0)", "cryptography (>=39.0.1)", "httpx[http2]", "httpx[socks]", "pytz (>=2018.6)", "tornado (>=6.4,<7.0)"] +callback-data = ["cachetools (>=5.3.3,<5.4.0)"] +ext = ["APScheduler (>=3.10.4,<3.11.0)", "aiolimiter (>=1.1.0,<1.2.0)", "cachetools (>=5.3.3,<5.4.0)", "pytz (>=2018.6)", "tornado (>=6.4,<7.0)"] +http2 = ["httpx[http2]"] +job-queue = ["APScheduler (>=3.10.4,<3.11.0)", "pytz (>=2018.6)"] +passport = ["cryptography (>=39.0.1)"] +rate-limiter = ["aiolimiter (>=1.1.0,<1.2.0)"] +socks = ["httpx[socks]"] +webhooks = ["tornado (>=6.4,<7.0)"] + +[[package]] +name = "sniffio" +version = "1.3.1" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"}, + {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"}, +] + +[metadata] +lock-version = "2.0" +python-versions = "^3.12" +content-hash = "ef0954feed816cd355771e2fe87995973fc35389cd3013545b81eeaa88212bad"
A
pyproject.toml
@@ -0,0 +1,17 @@
+[tool.poetry] +name = "albus" +version = "0.1.0" +description = "" +authors = ["Marco Andronaco <andronacomarco@gmail.com>"] +readme = "README.md" + +[tool.poetry.dependencies] +python = "^3.12" +pypandoc = "^1.13" +python-telegram-bot = "^21.2" +python-dotenv = "^1.0.1" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api"