Hacked in URL verification without visiting
This commit is contained in:
parent
a7d0e5aff2
commit
92a0689eb6
|
@ -15,7 +15,8 @@ from nfuck.utils import sanitize_link
|
|||
|
||||
dp = Dispatcher()
|
||||
|
||||
SILENT_REMOVAL_IDS: set[int] = set(list(map(int, getenv("SILENT_REMOVAL_IDS", "").split(","))))
|
||||
SILENT_REMOVAL_IDS: set[int] = set(list(map(int, filter(lambda v: v, getenv("SILENT_REMOVAL_IDS", "").split(",")))))
|
||||
|
||||
|
||||
@dp.message(Command("check"))
|
||||
async def on_check(message: Message):
|
||||
|
@ -66,8 +67,12 @@ async def on_message(message: Message):
|
|||
if confidence > 0.9:
|
||||
detected_links.append((entity.url, confidence))
|
||||
if detected_links:
|
||||
await message.delete()
|
||||
if message.from_user and message.chat.id not in SILENT_REMOVAL_IDS:
|
||||
msg = await message.reply(
|
||||
if not message.bot:
|
||||
raise RuntimeError("what")
|
||||
msg = await message.bot.send_message(
|
||||
message.chat.id,
|
||||
str.join(
|
||||
"\n",
|
||||
[
|
||||
|
@ -82,13 +87,11 @@ async def on_message(message: Message):
|
|||
],
|
||||
),
|
||||
f"Sender: {message.from_user.full_name} #{message.from_user.id} (@{message.from_user.username})",
|
||||
"(message will be deleted in 10 seconds)"
|
||||
"(message will be deleted in 10 seconds)",
|
||||
"False positive? Report <a href=\"https://forms.gle/cwj565M3y928M47g7\">here</a>!"
|
||||
],
|
||||
),
|
||||
parse_mode="html",
|
||||
)
|
||||
await message.delete()
|
||||
await sleep(10)
|
||||
await msg.delete()
|
||||
else:
|
||||
await message.delete()
|
||||
|
|
|
@ -16,6 +16,10 @@ USER_AGENT = [
|
|||
"Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0"
|
||||
]
|
||||
|
||||
URL_PATTERNS: list[tuple[float, Pattern, str]] = [
|
||||
(10.0, regexp(r"https://t.me/\w+[bB]ot/claim"), "Telegram Bot claim link")
|
||||
]
|
||||
|
||||
REGEX_PATTERNS: list[tuple[float, Pattern, str]] = [
|
||||
(1.0, regexp(r"\bp2e\b", IGNORECASE), "Play-to-earn keyword"),
|
||||
(5.0, regexp(r"play\-to\-earn", IGNORECASE), "Play-to-earn directly"),
|
||||
|
@ -32,7 +36,8 @@ REGEX_PATTERNS: list[tuple[float, Pattern, str]] = [
|
|||
(3.0, regexp(r"A collection of \w+ NFTs", IGNORECASE), "Collection of [some] NFTs"),
|
||||
]
|
||||
|
||||
MAX_SCORE = sum(t[0] for t in REGEX_PATTERNS)
|
||||
MAX_REGEX_SCORE = 30 # sum(t[0] for t in REGEX_PATTERNS)
|
||||
MAX_URL_SCORE = 10
|
||||
|
||||
|
||||
def explain_verification(content: str) -> list[tuple[float, str, Match]]:
|
||||
|
@ -56,6 +61,11 @@ async def verify_link(url: str) -> float:
|
|||
if any(fnmatch(domain, pat) for pat in DOMAIN_WHITELIST):
|
||||
logger.info("Score for %r: 0 (whitelisted domain)", url)
|
||||
return 0
|
||||
for score, regex, explanation in REGEX_PATTERNS:
|
||||
for match in regex.finditer(url):
|
||||
total_score += score
|
||||
if total_score >= MAX_REGEX_SCORE:
|
||||
return total_score / MAX_REGEX_SCORE
|
||||
async with AsyncClient(
|
||||
headers={"User-Agent": get_random_useragent()}
|
||||
) as client:
|
||||
|
@ -64,4 +74,4 @@ async def verify_link(url: str) -> float:
|
|||
logger.debug("%s: %s at %d", url, explanation, match.start())
|
||||
total_score += score
|
||||
logger.info("Score for %r: %f", url, total_score)
|
||||
return total_score / MAX_SCORE
|
||||
return total_score / MAX_REGEX_SCORE
|
||||
|
|
Loading…
Reference in New Issue