all repos — sunstroke @ 19ce085fa872db7f4f15e230524889ac5322f8a5

add custom resolver
Andronaco Marco marco.andronaco@olivetti.com
Thu, 13 Jul 2023 11:10:38 +0200
commit

19ce085fa872db7f4f15e230524889ac5322f8a5

parent

c4d85c371226380ae82f722938ca12eb5a528512

2 files changed, 50 insertions(+), 1 deletions(-)

jump to
A MyResolver.py

@@ -0,0 +1,42 @@

+import requests + +class HostHeaderSSLAdapter(requests.adapters.HTTPAdapter): + def resolve(self, hostname): + import random + ips = [ + '104.16.89.20', # CloudFlare + '151.101.2.109', # Fastly + ] + resolutions = { + 'overpost.biz': random.choice(ips), + } + return resolutions.get(hostname) + + def send(self, request, **kwargs): + from urllib.parse import urlparse + + connection_pool_kwargs = self.poolmanager.connection_pool_kw + result = urlparse(request.url) + resolved_ip = self.resolve(result.hostname) + + if result.scheme == 'https' and resolved_ip: + request.url = request.url.replace( + 'https://' + result.hostname, + 'https://' + resolved_ip, + ) + connection_pool_kwargs['server_hostname'] = result.hostname # SNI + connection_pool_kwargs['assert_hostname'] = result.hostname + + # overwrite the host header + request.headers['Host'] = result.hostname + else: + # theses headers from a previous request may have been left + connection_pool_kwargs.pop('server_hostname', None) + connection_pool_kwargs.pop('assert_hostname', None) + + return super(HostHeaderSSLAdapter, self).send(request, **kwargs) + +def get(url: str): + session = requests.Session() + session.mount('https://', HostHeaderSSLAdapter()) + return session.get(url)
M Overpost.pyOverpost.py

@@ -4,6 +4,7 @@ from re import compile

import os import feedparser from dotenv import load_dotenv +from MyResolver import get load_dotenv() RSS_URL = os.getenv("RSS_URL") or os.path.join(".", "rss.xml")

@@ -64,8 +65,14 @@

dict_pop_first_n(links, int(N_LINKS_TO_REMOVE)) return (datetime.strptime(date, "%d.%m.%Y"), links) +def handle_url(url): + if url.startswith("http"): + return get(url) + else: + return url + def get_links(rss_url): - feed = feedparser.parse(rss_url) + feed = feedparser.parse(handle_url(rss_url)) return [ parse_entry(entry) for entry in feed.entries ] def get_newspaper(prefix="", index=0):