Fixed regex mode of text filter just NOT WORKING
Apparently, `re.match` checks for the first line ONLY. We have to use `.search` instead. Also nuked away that plaintext conversion, we have it in status already as a property
This commit is contained in:
parent
8b03ccf44a
commit
6d3e00ba4a
|
@ -2,7 +2,6 @@ from configparser import SectionProxy
|
|||
from re import Pattern, compile as regexp
|
||||
from typing import Optional, Set
|
||||
|
||||
from bs4 import BeautifulSoup, PageElement, Tag
|
||||
from mastoposter.filters.base import BaseFilter
|
||||
from mastoposter.types import Status
|
||||
|
||||
|
@ -27,30 +26,10 @@ class TextFilter(BaseFilter, filter_name="content"):
|
|||
return cls(tags=set(section["tags"].split()))
|
||||
raise AssertionError("neither regexp or tags were set")
|
||||
|
||||
@classmethod
|
||||
def node_to_text(cls, el: PageElement) -> str:
|
||||
if isinstance(el, Tag):
|
||||
if el.name == "br":
|
||||
return "\n"
|
||||
elif el.name == "p":
|
||||
return (
|
||||
str.join("", map(cls.node_to_text, el.children)) + "\n\n"
|
||||
)
|
||||
return str.join("", map(cls.node_to_text, el.children))
|
||||
return str(el)
|
||||
|
||||
@classmethod
|
||||
def html_to_plain(cls, html: str) -> str:
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
return cls.node_to_text(soup).rstrip()
|
||||
|
||||
def __call__(self, status: Status) -> bool:
|
||||
source = status.reblog or status
|
||||
if self.regexp is not None:
|
||||
return (
|
||||
self.regexp.match(self.html_to_plain(source.content))
|
||||
is not None
|
||||
)
|
||||
return self.regexp.search(source.content_plaintext) is not None
|
||||
elif self.tags:
|
||||
return len(self.tags & {t.name.lower() for t in source.tags}) > 0
|
||||
else:
|
||||
|
|
Loading…
Reference in New Issue