diff --git a/docs/importer.rst b/docs/importer.rst index 377820af..0147f900 100644 --- a/docs/importer.rst +++ b/docs/importer.rst @@ -19,6 +19,22 @@ The conversion from HTML to reStructuredText relies on `pandoc written with Markdown syntax, they will not be converted (as Pelican also supports Markdown). +Dependencies +"""""""""""" + +``pelican-import`` has two dependencies not required by the rest of pelican: + +- BeautifulSoup +- pandoc + +BeatifulSoup can be installed like any other Python package:: + + $ pip install BeautifulSoup + +For pandoc, install a package for your operating system from the +`pandoc site `_. + + Usage """"" @@ -26,8 +42,8 @@ Usage | [--dir-cat] | input -Optional arguments: -""""""""""""""""""" +Optional arguments +"""""""""""""""""" -h, --help show this help message and exit --wpfile Wordpress XML export diff --git a/pelican/tools/pelican_import.py b/pelican/tools/pelican_import.py index 57c4fc22..050b1010 100755 --- a/pelican/tools/pelican_import.py +++ b/pelican/tools/pelican_import.py @@ -13,7 +13,12 @@ from pelican.utils import slugify def wp2fields(xml): """Opens a wordpress XML file, and yield pelican fields""" - from BeautifulSoup import BeautifulStoneSoup + try: + from BeautifulSoup import BeautifulStoneSoup + except ImportError: + error = ('Missing dependency ' + '"BeautifulSoup" required to import Wordpress XML files.') + sys.exit(error) xmlfile = open(xml, encoding='utf-8').read() soup = BeautifulStoneSoup(xmlfile) @@ -40,7 +45,13 @@ def wp2fields(xml): def dc2fields(file): """Opens a Dotclear export file, and yield pelican fields""" - from BeautifulSoup import BeautifulStoneSoup + try: + from BeautifulSoup import BeautifulStoneSoup + except ImportError: + error = ('Missing dependency ' + '"BeautifulSoup" required to import Dotclear files.') + sys.exit(error) + in_cat = False in_post = False @@ -213,9 +224,12 @@ def fields2pelican(fields, out_markup, output_path, dircat=False): html_filename = os.path.join(output_path, filename+'.html') with open(html_filename, 'w', encoding='utf-8') as fp: - # Replace simple newlines with
+newline so that the HTML file - # represents the original post more accurately - content = content.replace("\n", "
\n") + # Replace newlines with paragraphs wrapped with

so + # HTML is valid before conversion + paragraphs = content.split('\n\n') + paragraphs = [u'

{}

'.format(p) for p in paragraphs] + new_content = ''.join(paragraphs) + fp.write(content) cmd = 'pandoc --normalize --reference-links --from=html --to={0} -o "{1}" "{2}"'.format(