diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..e2e2fed
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.10-alpine
+
+WORKDIR /app
+
+COPY requirements.txt /app/requirements.txt
+
+RUN pip install -r /app/requirements.txt && rm /app/requirements.txt
+
+COPY . /app
+
+CMD ["python3", "-m", "nfuck"]
diff --git a/logging.json b/logging.json
new file mode 100644
index 0000000..8d2f159
--- /dev/null
+++ b/logging.json
@@ -0,0 +1,24 @@
+{
+ "version": 1,
+ "disable_existing_loggers": false,
+ "formatters": {
+ "simple": {
+ "format": "%(asctime)s %(levelname)s %(module)s L%(lineno)d: %(message)s"
+ }
+ },
+ "handlers": {
+ "stdout": {
+ "class": "logging.StreamHandler",
+ "formatter": "simple",
+ "stream": "ext://sys.stdout"
+ }
+ },
+ "loggers": {
+ "root": {
+ "level": "DEBUG",
+ "handlers": [
+ "stdout"
+ ]
+ }
+ }
+}
diff --git a/nfuck/__init__.py b/nfuck/__init__.py
index e9ef936..cbb183d 100644
--- a/nfuck/__init__.py
+++ b/nfuck/__init__.py
@@ -3,44 +3,89 @@ from aiogram.types import Message
from aiogram.filters import Command
from httpx import AsyncClient
-from nfuck.link_verifier import explain_verification, get_random_useragent, verify_link
+from nfuck.link_verifier import (
+ explain_verification,
+ get_random_useragent,
+ verify_link,
+)
+from nfuck.utils import sanitize_link
+
dp = Dispatcher()
+# @dp.message(Command("dd"))
+# async def on_dd(message: Message):
+# if message.reply_to_message:
+# await message.reply_to_message.delete()
+
+
@dp.message(Command("check"))
async def on_check(message: Message):
results = []
for entity in message.entities or []:
if entity.type in ("text_link", "url") and message.text:
if entity.type == "url":
- entity.url = message.text[entity.offset : entity.offset + entity.length]
+ entity.url = message.text[
+ entity.offset : entity.offset + entity.length
+ ]
if not entity.url:
continue
+ if not entity.url.startswith("http"):
+ entity.url = "https://" + entity.url
async with AsyncClient(
headers={"User-Agent": get_random_useragent()}
) as client:
data = (await client.get(entity.url)).text
total_score = 0
- results.append(f"{entity.url}")
+ results.append(f"{sanitize_link(entity.url)}")
for score, explanation, match in explain_verification(data):
results.append(f"{match.span()}: {explanation}")
total_score += score
results.append(f"Total score: {total_score}")
results.append("")
- await message.reply(str.join("\n", results), parse_mode="html")
+ if results:
+ await message.reply(
+ str.join("\n", results),
+ parse_mode="html",
+ disable_web_page_preview=True,
+ )
+ else:
+ await message.reply(":shrug:")
@dp.message()
async def on_message(message: Message):
+ detected_links: list[tuple[str, float]] = []
for entity in message.entities or []:
if entity.type in ("text_link", "url") and message.text:
if entity.type == "url":
- entity.url = message.text[entity.offset : entity.offset + entity.length]
+ entity.url = message.text[
+ entity.offset : entity.offset + entity.length
+ ]
if not entity.url:
continue
confidence = await verify_link(entity.url)
- if confidence > 0.75:
- await message.reply(f"Holy smokes, another one (~{confidence*100:.0f}% sure)")
- await message.delete()
-
-
+ if confidence > 0.9:
+ detected_links.append((entity.url, confidence))
+ if detected_links:
+ if message.from_user:
+ await message.reply(
+ str.join(
+ "\n",
+ [
+ f"Found {len(detected_links)} links:",
+ str.join(
+ "\n",
+ [
+ f"{i}. {sanitize_link(url)} with confidence {confidence:.2f}"
+ for i, (url, confidence) in enumerate(
+ detected_links, 1
+ )
+ ],
+ ),
+ f"Sender: {message.from_user.full_name} #{message.from_user.id} (@{message.from_user.username})",
+ ],
+ ),
+ parse_mode="html",
+ )
+ await message.delete()
diff --git a/nfuck/__main__.py b/nfuck/__main__.py
index 8e8ff5e..8afd27c 100644
--- a/nfuck/__main__.py
+++ b/nfuck/__main__.py
@@ -2,6 +2,12 @@ from aiogram import Bot
from aiosqlite import connect as asqlite
from os import environ
from nfuck import dp
+import logging.config
+from json import load as load_json
+
+with open("logging.json", "r") as f_in:
+ logging.config.dictConfig(load_json(f_in))
+
async def main():
bot = Bot(environ["TG_BOT_TOKEN"])
@@ -9,7 +15,8 @@ async def main():
await dp.start_polling(bot, db=db)
await db.close()
+
if __name__ == "__main__":
from asyncio import run
- run(main())
+ run(main())
diff --git a/nfuck/link_verifier.py b/nfuck/link_verifier.py
index 52eb915..aa5db90 100644
--- a/nfuck/link_verifier.py
+++ b/nfuck/link_verifier.py
@@ -1,6 +1,10 @@
from httpx import AsyncClient
from re import Match, Pattern, compile as regexp, IGNORECASE
from random import choice
+from logging import DEBUG, getLogger
+
+logger = getLogger("nfuck.link_verifier")
+logger.setLevel(DEBUG)
USER_AGENT = [
"Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0"
@@ -19,6 +23,7 @@ REGEX_PATTERNS: list[tuple[float, Pattern, str]] = [
MAX_SCORE = sum(t[0] for t in REGEX_PATTERNS)
+
def explain_verification(content: str) -> list[tuple[float, str, Match]]:
result: list[tuple[float, str, Match]] = []
for score, regex, explanation in REGEX_PATTERNS:
@@ -26,15 +31,22 @@ def explain_verification(content: str) -> list[tuple[float, str, Match]]:
result.append((score, explanation, match))
return result
+
def get_random_useragent() -> str:
return choice(USER_AGENT)
+
async def verify_link(url: str) -> float:
total_score = 0
+ logger.info("Verifying link %s", url)
+ if not url.startswith("http"):
+ url = "https://" + url
async with AsyncClient(
headers={"User-Agent": get_random_useragent()}
) as client:
data = await client.get(url)
- for score, _, _ in explain_verification(data.text):
+ for score, explanation, match in explain_verification(data.text):
+ logger.debug("%s: %s at %d", url, explanation, match.start())
total_score += score
+ logger.info("Score for %r: %f", url, total_score)
return total_score / MAX_SCORE
diff --git a/nfuck/utils.py b/nfuck/utils.py
new file mode 100644
index 0000000..5a6d704
--- /dev/null
+++ b/nfuck/utils.py
@@ -0,0 +1,2 @@
+def sanitize_link(url: str) -> str:
+ return url.replace("://", "[://]").replace(".", "[dot]")