forked from github/pelican
Merge pull request #325 from akavlie/issue-314
Better output formatting (see issue #314)
This commit is contained in:
commit
004ff4e7b6
2 changed files with 37 additions and 7 deletions
|
|
@ -19,6 +19,22 @@ The conversion from HTML to reStructuredText relies on `pandoc
|
|||
written with Markdown syntax, they will not be converted (as Pelican also
|
||||
supports Markdown).
|
||||
|
||||
Dependencies
|
||||
""""""""""""
|
||||
|
||||
``pelican-import`` has two dependencies not required by the rest of pelican:
|
||||
|
||||
- BeautifulSoup
|
||||
- pandoc
|
||||
|
||||
BeatifulSoup can be installed like any other Python package::
|
||||
|
||||
$ pip install BeautifulSoup
|
||||
|
||||
For pandoc, install a package for your operating system from the
|
||||
`pandoc site <http://johnmacfarlane.net/pandoc/installing.html>`_.
|
||||
|
||||
|
||||
Usage
|
||||
"""""
|
||||
|
||||
|
|
@ -26,8 +42,8 @@ Usage
|
|||
| [--dir-cat]
|
||||
| input
|
||||
|
||||
Optional arguments:
|
||||
"""""""""""""""""""
|
||||
Optional arguments
|
||||
""""""""""""""""""
|
||||
|
||||
-h, --help show this help message and exit
|
||||
--wpfile Wordpress XML export
|
||||
|
|
|
|||
|
|
@ -13,7 +13,12 @@ from pelican.utils import slugify
|
|||
|
||||
def wp2fields(xml):
|
||||
"""Opens a wordpress XML file, and yield pelican fields"""
|
||||
from BeautifulSoup import BeautifulStoneSoup
|
||||
try:
|
||||
from BeautifulSoup import BeautifulStoneSoup
|
||||
except ImportError:
|
||||
error = ('Missing dependency '
|
||||
'"BeautifulSoup" required to import Wordpress XML files.')
|
||||
sys.exit(error)
|
||||
|
||||
xmlfile = open(xml, encoding='utf-8').read()
|
||||
soup = BeautifulStoneSoup(xmlfile)
|
||||
|
|
@ -40,7 +45,13 @@ def wp2fields(xml):
|
|||
|
||||
def dc2fields(file):
|
||||
"""Opens a Dotclear export file, and yield pelican fields"""
|
||||
from BeautifulSoup import BeautifulStoneSoup
|
||||
try:
|
||||
from BeautifulSoup import BeautifulStoneSoup
|
||||
except ImportError:
|
||||
error = ('Missing dependency '
|
||||
'"BeautifulSoup" required to import Dotclear files.')
|
||||
sys.exit(error)
|
||||
|
||||
|
||||
in_cat = False
|
||||
in_post = False
|
||||
|
|
@ -213,9 +224,12 @@ def fields2pelican(fields, out_markup, output_path, dircat=False):
|
|||
html_filename = os.path.join(output_path, filename+'.html')
|
||||
|
||||
with open(html_filename, 'w', encoding='utf-8') as fp:
|
||||
# Replace simple newlines with <br />+newline so that the HTML file
|
||||
# represents the original post more accurately
|
||||
content = content.replace("\n", "<br />\n")
|
||||
# Replace newlines with paragraphs wrapped with <p> so
|
||||
# HTML is valid before conversion
|
||||
paragraphs = content.split('\n\n')
|
||||
paragraphs = [u'<p>{}</p>'.format(p) for p in paragraphs]
|
||||
new_content = ''.join(paragraphs)
|
||||
|
||||
fp.write(content)
|
||||
|
||||
cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue