Fixed regex mode of text filter just NOT WORKING
Apparently, `re.match` checks for the first line ONLY. We have to use `.search` instead. Also nuked away that plaintext conversion, we have it in status already as a property
This commit is contained in:
parent
8b03ccf44a
commit
6d3e00ba4a
|
@ -2,7 +2,6 @@ from configparser import SectionProxy
|
||||||
from re import Pattern, compile as regexp
|
from re import Pattern, compile as regexp
|
||||||
from typing import Optional, Set
|
from typing import Optional, Set
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, PageElement, Tag
|
|
||||||
from mastoposter.filters.base import BaseFilter
|
from mastoposter.filters.base import BaseFilter
|
||||||
from mastoposter.types import Status
|
from mastoposter.types import Status
|
||||||
|
|
||||||
|
@ -27,30 +26,10 @@ class TextFilter(BaseFilter, filter_name="content"):
|
||||||
return cls(tags=set(section["tags"].split()))
|
return cls(tags=set(section["tags"].split()))
|
||||||
raise AssertionError("neither regexp or tags were set")
|
raise AssertionError("neither regexp or tags were set")
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def node_to_text(cls, el: PageElement) -> str:
|
|
||||||
if isinstance(el, Tag):
|
|
||||||
if el.name == "br":
|
|
||||||
return "\n"
|
|
||||||
elif el.name == "p":
|
|
||||||
return (
|
|
||||||
str.join("", map(cls.node_to_text, el.children)) + "\n\n"
|
|
||||||
)
|
|
||||||
return str.join("", map(cls.node_to_text, el.children))
|
|
||||||
return str(el)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def html_to_plain(cls, html: str) -> str:
|
|
||||||
soup = BeautifulSoup(html, "lxml")
|
|
||||||
return cls.node_to_text(soup).rstrip()
|
|
||||||
|
|
||||||
def __call__(self, status: Status) -> bool:
|
def __call__(self, status: Status) -> bool:
|
||||||
source = status.reblog or status
|
source = status.reblog or status
|
||||||
if self.regexp is not None:
|
if self.regexp is not None:
|
||||||
return (
|
return self.regexp.search(source.content_plaintext) is not None
|
||||||
self.regexp.match(self.html_to_plain(source.content))
|
|
||||||
is not None
|
|
||||||
)
|
|
||||||
elif self.tags:
|
elif self.tags:
|
||||||
return len(self.tags & {t.name.lower() for t in source.tags}) > 0
|
return len(self.tags & {t.name.lower() for t in source.tags}) > 0
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in New Issue