Upgrade Beautiful Soup & adjust tests to conform

This commit is contained in:
Justin Mayer 2025-04-11 18:33:27 +02:00
commit 88a6f57940
2 changed files with 10 additions and 10 deletions

View file

@ -27,7 +27,7 @@ logger = logging.getLogger(__name__)
def decode_wp_content(content, br=True): def decode_wp_content(content, br=True):
pre_tags = {} pre_tags = {}
if content.strip() == "": if content is None or content.strip() == "":
return "" return ""
content += "\n" content += "\n"
@ -148,7 +148,7 @@ def wp2fields(xml, wp_custpost=False):
"""Opens a wordpress XML file, and yield Pelican fields""" """Opens a wordpress XML file, and yield Pelican fields"""
soup = file_to_soup(xml) soup = file_to_soup(xml)
items = soup.rss.channel.findAll("item") items = soup.rss.channel.find_all("item")
for item in items: for item in items:
if item.find("status").string in ["publish", "draft"]: if item.find("status").string in ["publish", "draft"]:
try: try:
@ -172,11 +172,11 @@ def wp2fields(xml, wp_custpost=False):
author = item.find("creator").string author = item.find("creator").string
categories = [ categories = [
cat.string for cat in item.findAll("category", {"domain": "category"}) cat.string for cat in item.find_all("category", {"domain": "category"})
] ]
tags = [ tags = [
tag.string for tag in item.findAll("category", {"domain": "post_tag"}) tag.string for tag in item.find_all("category", {"domain": "post_tag"})
] ]
# To publish a post the status should be 'published' # To publish a post the status should be 'published'
status = ( status = (
@ -218,7 +218,7 @@ def blogger2fields(xml):
"""Opens a blogger XML file, and yield Pelican fields""" """Opens a blogger XML file, and yield Pelican fields"""
soup = file_to_soup(xml) soup = file_to_soup(xml)
entries = soup.feed.findAll("entry") entries = soup.feed.find_all("entry")
for entry in entries: for entry in entries:
raw_kind = entry.find( raw_kind = entry.find(
"category", {"scheme": "http://schemas.google.com/g/2005#kind"} "category", {"scheme": "http://schemas.google.com/g/2005#kind"}
@ -253,7 +253,7 @@ def blogger2fields(xml):
# blogger posts only have tags, no category # blogger posts only have tags, no category
tags = [ tags = [
tag.get("term") tag.get("term")
for tag in entry.findAll( for tag in entry.find_all(
"category", {"scheme": "http://www.blogger.com/atom/ns#"} "category", {"scheme": "http://www.blogger.com/atom/ns#"}
) )
] ]
@ -571,8 +571,8 @@ def strip_medium_post_content(soup) -> str:
# See https://stackoverflow.com/a/8439761 # See https://stackoverflow.com/a/8439761
invalid_tags = ["section", "div", "footer"] invalid_tags = ["section", "div", "footer"]
for tag in invalid_tags: for tag in invalid_tags:
for match in soup.findAll(tag): for match in soup.find_all(tag):
match.replaceWithChildren() match.unwrap()
# Remove attributes # Remove attributes
# See https://stackoverflow.com/a/9045719 # See https://stackoverflow.com/a/9045719
@ -845,7 +845,7 @@ def get_attachments(xml):
of the attachment_urls of the attachment_urls
""" """
soup = file_to_soup(xml) soup = file_to_soup(xml)
items = soup.rss.channel.findAll("item") items = soup.rss.channel.find_all("item")
names = {} names = {}
attachments = [] attachments = []

View file

@ -78,7 +78,7 @@ test = "invoke tests"
[tool.pdm.dev-dependencies] [tool.pdm.dev-dependencies]
dev = [ dev = [
"BeautifulSoup4>=4.12.2", "BeautifulSoup4>=4.13.3",
"jinja2>=3.1.2", "jinja2>=3.1.2",
"lxml>=4.9.3", "lxml>=4.9.3",
"markdown>=3.5.1", "markdown>=3.5.1",