all repos — sunstroke @ 585855a8728f87cc5383329bd227f6d6ba840aff

working version
Andronaco Marco marco.andronaco@olivetti.com
Wed, 12 Jul 2023 13:02:46 +0200
commit

585855a8728f87cc5383329bd227f6d6ba840aff

parent

733a0a23988fb074c93e6c398d8b9142ee180b29

5 files changed, 81 insertions(+), 57 deletions(-)

jump to
A .env.example

@@ -0,0 +1,6 @@

+PYLOAD_HOST=http://localhost:8000/ +PYLOAD_USER=user +PYLOAD_PW=password +N_LINKS_TO_REMOVE=2 +NEWSPAPER_PREFIX=Il Sole 24 Ore +RSS_URL=https://overpost.biz/e-books/quotidiani/rss.xml
M .gitignore.gitignore

@@ -1,4 +1,5 @@

__pycache__ venv +.env rss.xml session.txt
A MyPyload.py

@@ -0,0 +1,36 @@

+# My edited version of https://github.com/thammi/pyload-utils/blob/master/pyloadutils/pyload.py +import json +from urllib.request import urlopen +from urllib.parse import urljoin, urlencode +from dotenv import load_dotenv +from os import getenv +load_dotenv() + +PYLOAD_HOST = getenv("PYLOAD_HOST") or "http://localhost:8000/" +PYLOAD_USER = getenv("PYLOAD_USER") or "pyload" +PYLOAD_PW = getenv("PYLOAD_PW") or "pyload" + +class Pyload: + + def __init__(self): + self.url_base = urljoin(PYLOAD_HOST, 'api/') + self.session = self._call('login', {'username': PYLOAD_USER, 'password': PYLOAD_PW}, False) + + def _call(self, name, args={}, encode=True): + url = urljoin(self.url_base, name) + + if encode: + data = { k: json.dumps(v) for k, v in args.items() } + else: + data = args + + if hasattr(self, 'session'): + data['session'] = self.session + + post = urlencode(data).encode('utf-8') + return json.loads(urlopen(url, post).read().decode('utf-8')) + + def __getattr__(self, name): + def wrapper(**kargs): + return self._call(name, kargs) + return wrapper
M Sole.pyOverpost.py

@@ -1,11 +1,14 @@

-import feedparser from html.parser import HTMLParser from datetime import datetime from re import compile +import os +import feedparser +from dotenv import load_dotenv +load_dotenv() -N_LINKS_TO_REMOVE = 2 +RSS_URL = os.getenv("RSS_URL") or os.path.join(".", "rss.xml") +N_LINKS_TO_REMOVE = os.getenv("N_LINKS_TO_REMOVE") or 2 REGEX_DATE = compile("\(([\d\.]*)\)") -OVERPOST_URL = "https://overpost.biz/e-books/quotidiani/rss.xml" def add_or_update(dictionary, key, value): try:

@@ -48,29 +51,30 @@ parser = PostParser()

parser.feed(html) return parser.get_links() -def remove_first(d): +def dict_pop(d): return (k := next(iter(d)), d.pop(k)) -def remove_first_n(d, n): - for i in range(n): - remove_first(d) +def dict_pop_first_n(d, n): + return [dict_pop(d) for _ in range(n)] def parse_entry(entry): # entry = day date = REGEX_DATE.findall(entry.title)[0] links = parse_html(entry.turbo_content) - remove_first_n(links, N_LINKS_TO_REMOVE) + dict_pop_first_n(links, int(N_LINKS_TO_REMOVE)) return (datetime.strptime(date, "%d.%m.%Y"), links) def get_links(rss_url): feed = feedparser.parse(rss_url) return [ parse_entry(entry) for entry in feed.entries ] -def get_sole(): - links = get_links(OVERPOST_URL) - today = links[1] - return { k: v for k, v in today[1].items() if k.startswith("Il Sole 24 Ore")} +def get_newspaper(prefix="", index=0): + links = get_links(RSS_URL) + try: + daily = links[index][1] + except IndexError: + return {} + return { k: v for k, v in daily.items() if k.startswith(prefix)} -OVERPOST_URL = r"/home/marco/Documenti/overpost/rss.xml" if __name__ == "__main__": - print(get_sole()) + print(get_newspaper("Il Sole"))
M main.pymain.py

@@ -1,48 +1,25 @@

-import json -import requests # https://github.com/pyload/pyload/wiki/module.Api.Api -from Sole import get_sole, remove_first +from Overpost import get_newspaper +from MyPyload import Pyload +from os import getenv -SESSION_FILENAME = "session.txt" -PYLOAD_PROTOCOL = "http" -PYLOAD_HOST = "localhost" -PYLOAD_PORT = 8000 -PYLOAD_USER = "pyload" -PYLOAD_PW = "pyload" -PYLOAD_API_ENDPOINT = "/api" -PYLOAD_LOGIN_ENDPOINT = "/login" -PYLOAD_ADDPACKAGE_ENDPOINT = "/generateAndAddPackages" -PYLOAD_API_URL = f"{ PYLOAD_PROTOCOL }://{ PYLOAD_HOST }:{ PYLOAD_PORT }{ PYLOAD_API_ENDPOINT }" +NEWSPAPER_PREFIX = getenv("NEWSPAPER_PREFIX") or "" -LOGIN_DATA = { "username": PYLOAD_USER, "password": PYLOAD_PW } -LOGIN_URL = PYLOAD_API_URL + PYLOAD_LOGIN_ENDPOINT -ADDPACKAGE_URL = PYLOAD_API_URL + PYLOAD_ADDPACKAGE_ENDPOINT +def scroll_dict(dictionary): + i = 0 + for key, values in dictionary.items(): + if i >= len(values): + i = 0 + yield key, values[i] + i += 1 -def get_session_id(): - try: - with open(SESSION_FILENAME, "r", encoding="utf-8") as in_file: - return in_file.readline() - except FileNotFoundError: - res = requests.post(LOGIN_URL, data=LOGIN_DATA) - cookies = res.cookies.get_dict() - session_id = cookies['pyload_session'] - with open(SESSION_FILENAME, "w", encoding="utf-8") as out_file: - out_file.write(session_id) - return session_id - -def add_package(links): - ADDPACKAGE_DATA = { "links": json.dumps(links), "session": session_id } - print(ADDPACKAGE_URL) - print(ADDPACKAGE_DATA) - kek = requests.post(ADDPACKAGE_URL, data=LOGIN_DATA).text - return kek +def download_link(connection, name, link): + return connection.addPackage(name=name, links=[link]) + +def main(): + newspapers = get_newspaper(NEWSPAPER_PREFIX, 0) # 0 -> today + con = Pyload() + pids = [ download_link(con, NEWSPAPER_PREFIX, link) for _, link in scroll_dict(newspapers) ] + print(pids) if __name__ == "__main__": - session_id = get_session_id() - - #sole = get_sole() - #sole_link = remove_first(sole)[1][0] - - - links = [ "http://localhost:8080/file2", "http://localhost:8080/file1" ] - - print(add_package(links)) + exit(main())