Check link previews too

This commit is contained in:
Casey 2024-11-15 12:38:08 +03:00
parent af4545b583
commit fd6429ab05
Signed by: hkc
GPG Key ID: F0F6CFE11CDB0960
1 changed files with 39 additions and 20 deletions

View File

@ -25,6 +25,9 @@ SILENT_REMOVAL_IDS: set[int] = set(list(map(int, filter(lambda v: v, getenv("SIL
@dp.message(Command("check")) @dp.message(Command("check"))
async def on_check(message: Message): async def on_check(message: Message):
results = [] results = []
urls = []
if message.link_preview_options:
urls.append(message.link_preview_options.url)
for entity in message.entities or []: for entity in message.entities or []:
if entity.type in ("text_link", "url") and message.text: if entity.type in ("text_link", "url") and message.text:
if entity.type == "url": if entity.type == "url":
@ -35,12 +38,14 @@ async def on_check(message: Message):
continue continue
if not entity.url.startswith("http"): if not entity.url.startswith("http"):
entity.url = "https://" + entity.url entity.url = "https://" + entity.url
urls.append(entity.url)
for url in urls:
async with AsyncClient( async with AsyncClient(
headers={"User-Agent": get_random_useragent()} headers={"User-Agent": get_random_useragent()}
) as client: ) as client:
data = (await client.get(entity.url)).text data = (await client.get(url)).text
total_score = 0 total_score = 0
results.append(f"<b>{sanitize_link(entity.url)}</b>") results.append(f"<b>{sanitize_link(url)}</b>")
counts = {} counts = {}
for score, explanation, _ in explain_verification(data): for score, explanation, _ in explain_verification(data):
counts[explanation] = counts.get(explanation, 0) + 1 counts[explanation] = counts.get(explanation, 0) + 1
@ -73,7 +78,10 @@ async def on_force(message: Message):
if not message.reply_to_message: if not message.reply_to_message:
return return
detected_links: list[tuple[str, float]] = [] detected_links: list[tuple[str, float]] = []
for entity in message.reply_to_message.entities or []: urls = []
if message.link_preview_options:
urls.append(message.link_preview_options.url)
for entity in message.entities or []:
if entity.type in ("text_link", "url") and message.text: if entity.type in ("text_link", "url") and message.text:
if entity.type == "url": if entity.type == "url":
entity.url = message.text[ entity.url = message.text[
@ -81,8 +89,12 @@ async def on_force(message: Message):
] ]
if not entity.url: if not entity.url:
continue continue
confidence = await verify_link(entity.url) if not entity.url.startswith("http"):
detected_links.append((entity.url, confidence)) entity.url = "https://" + entity.url
urls.append(entity.url)
for url in urls:
confidence = await verify_link(url)
detected_links.append((url, confidence))
n_links = len(detected_links) n_links = len(detected_links)
n_harmful = len(list(filter(lambda lnk: lnk[1] > 0.9, detected_links))) n_harmful = len(list(filter(lambda lnk: lnk[1] > 0.9, detected_links)))
if n_harmful > 0: if n_harmful > 0:
@ -106,6 +118,9 @@ def form_for(message: Message, link: str) -> str:
@dp.message() @dp.message()
async def on_message(message: Message): async def on_message(message: Message):
detected_links: list[tuple[str, float]] = [] detected_links: list[tuple[str, float]] = []
urls = []
if message.link_preview_options:
urls.append(message.link_preview_options.url)
for entity in message.entities or []: for entity in message.entities or []:
if entity.type in ("text_link", "url") and message.text: if entity.type in ("text_link", "url") and message.text:
if entity.type == "url": if entity.type == "url":
@ -114,9 +129,13 @@ async def on_message(message: Message):
] ]
if not entity.url: if not entity.url:
continue continue
confidence = await verify_link(entity.url) if not entity.url.startswith("http"):
entity.url = "https://" + entity.url
urls.append(entity.url)
for url in urls:
confidence = await verify_link(url)
if confidence > 0.9: if confidence > 0.9:
detected_links.append((entity.url, confidence)) detected_links.append((url, confidence))
if detected_links: if detected_links:
await message.delete() await message.delete()
if message.from_user and message.chat.id not in SILENT_REMOVAL_IDS: if message.from_user and message.chat.id not in SILENT_REMOVAL_IDS: