Added domain whitelist
This commit is contained in:
parent
591a23976f
commit
49d5b8671e
|
@ -2,10 +2,16 @@ from httpx import AsyncClient
|
||||||
from re import Match, Pattern, compile as regexp, IGNORECASE
|
from re import Match, Pattern, compile as regexp, IGNORECASE
|
||||||
from random import choice
|
from random import choice
|
||||||
from logging import DEBUG, getLogger
|
from logging import DEBUG, getLogger
|
||||||
|
from os import getenv
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
from fnmatch import fnmatch
|
||||||
|
|
||||||
logger = getLogger("nfuck.link_verifier")
|
logger = getLogger("nfuck.link_verifier")
|
||||||
logger.setLevel(DEBUG)
|
logger.setLevel(DEBUG)
|
||||||
|
|
||||||
|
# TODO: get it out of here somehow
|
||||||
|
DOMAIN_WHITELIST: set[str] = set(filter(lambda v: v, getenv("DOMAIN_WHITELIST", "").split(",")))
|
||||||
|
|
||||||
USER_AGENT = [
|
USER_AGENT = [
|
||||||
"Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0"
|
"Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0"
|
||||||
]
|
]
|
||||||
|
@ -43,6 +49,10 @@ async def verify_link(url: str) -> float:
|
||||||
logger.info("Verifying link %s", url)
|
logger.info("Verifying link %s", url)
|
||||||
if not url.startswith("http"):
|
if not url.startswith("http"):
|
||||||
url = "https://" + url
|
url = "https://" + url
|
||||||
|
domain = urlparse(url).netloc
|
||||||
|
if any(fnmatch(domain, pat) for pat in DOMAIN_WHITELIST):
|
||||||
|
logger.info("Score for %r: 0 (whitelisted domain)", url)
|
||||||
|
return 0
|
||||||
async with AsyncClient(
|
async with AsyncClient(
|
||||||
headers={"User-Agent": get_random_useragent()}
|
headers={"User-Agent": get_random_useragent()}
|
||||||
) as client:
|
) as client:
|
||||||
|
|
Loading…
Reference in New Issue