#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import argparse
try:
# py3k import
from html.parser import HTMLParser
except ImportError:
# py2 import
from HTMLParser import HTMLParser # NOQA
import os
import re
import subprocess
import sys
import time
import logging
from codecs import open
from pelican.utils import slugify
from pelican.log import init
logger = logging.getLogger(__name__)
def decode_wp_content(content, br=True):
pre_tags = {}
if content.strip() == "":
return ""
content += "\n"
if "
")
last_pre = pre_parts.pop()
content = ""
pre_index = 0
for pre_part in pre_parts:
start = pre_part.find(""
content = content + pre_part[0:start] + name
pre_index += 1
content = content + last_pre
content = re.sub(r'
\s*
', "\n\n", content)
allblocks = ('(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|'
'td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|'
'map|area|blockquote|address|math|style|p|h[1-6]|hr|'
'fieldset|noscript|samp|legend|section|article|aside|'
'hgroup|header|footer|nav|figure|figcaption|details|'
'menu|summary)')
content = re.sub(r'(<' + allblocks + r'[^>]*>)', "\n\\1", content)
content = re.sub(r'(' + allblocks + r'>)', "\\1\n\n", content)
# content = content.replace("\r\n", "\n")
if "