#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import unicode_literals, print_function
import argparse
try:
# py3k import
from html.parser import HTMLParser
except ImportError:
# py2 import
from HTMLParser import HTMLParser # NOQA
import os
import re
import subprocess
import sys
import time
import logging
from codecs import open
from six.moves.urllib.error import URLError
from six.moves.urllib.parse import urlparse
from six.moves.urllib.request import urlretrieve
from pelican.utils import slugify
from pelican.log import init
logger = logging.getLogger(__name__)
def decode_wp_content(content, br=True):
pre_tags = {}
if content.strip() == "":
return ""
content += "\n"
if "
")
last_pre = pre_parts.pop()
content = ""
pre_index = 0
for pre_part in pre_parts:
start = pre_part.find(""
content = content + pre_part[0:start] + name
pre_index += 1
content = content + last_pre
content = re.sub(r'
\s*
', "\n\n", content)
allblocks = ('(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|'
'td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|'
'map|area|blockquote|address|math|style|p|h[1-6]|hr|'
'fieldset|noscript|samp|legend|section|article|aside|'
'hgroup|header|footer|nav|figure|figcaption|details|'
'menu|summary)')
content = re.sub(r'(<' + allblocks + r'[^>]*>)', "\n\\1", content)
content = re.sub(r'(' + allblocks + r'>)', "\\1\n\n", content)
# content = content.replace("\r\n", "\n")
if "