unescape chyrp html content

2025-10-15 20:28:56 +02:00 · 2013-01-05 21:03:45 +08:00 · 2013-01-05 21:03:45 +08:00 · 3987921875
commit 3987921875
parent 1b906da57a
1 changed files with 7 additions and 2 deletions
--- a/pelican/tools/pelican_import.py
+++ b/pelican/tools/pelican_import.py
@ -171,19 +171,24 @@ def dc2fields(file):
 def chyrp2fields(atom):
    """Opens a Chyrp Atom file, and yield pelican fields"""
    import feedparser
+    import markdown
+
    d = feedparser.parse(atom)
    for entry in d.entries:

        if entry.chyrp_status == 'public' and entry.chyrp_feather == 'text':
+            # Chyrp support both html and markdown, must convert by finding type
+            # content = markdown.markdown(entry.summary)
+            content = HTMLParser().unescape(entry.summary)

            date = (time.strftime("%Y-%m-%d %H:%M", entry.updated_parsed)
                if hasattr(entry, "updated_parsed") else None)
            author = entry.author if hasattr(entry, "author") else None
            tags = entry.tags if hasattr(entry, "tags") else None
            slug = entry.chyrp_url if hasattr(entry, "chyrp_url") else None
-            tags = [tag[0] for tag in re.findall(r"(.*)\:\s*\"(.*)\"", entry.tags)] if hasattr(entry, "tags") else None
+            tags = [tag[1] for tag in re.findall(r"(.*)\:\s*\"(.*)\"", entry.tags)] if hasattr(entry, "tags") else None

-            yield (entry.title, entry.summary, slug, date, author, [], tags, "html")
+            yield (entry.title, content, slug, date, author, [], tags, "html")


 def feed2fields(file):