Revert "Get HtmlReader to work again"

This reverts commit 39db9ddcfd.

Conflicts:

	tests/test_readers.py
This commit is contained in:
Alexis Métaireau 2012-10-29 00:36:42 +01:00
commit 84c708b74b
7 changed files with 15 additions and 72 deletions

View file

@ -172,17 +172,6 @@ Markdown posts should follow this pattern::
This is the content of my super blog post. This is the content of my super blog post.
Your third option is to write raw html (by ending your file in ``.html``)::
<!-- title: My super title -->
<!-- date: 2010-12-03 10:20 -->
<!-- tags: thats, awesome -->
<!-- category: yeah -->
<p>
This is the content of my super blog post.
</p>
Note that, aside from the title, none of this metadata is mandatory: if the date Note that, aside from the title, none of this metadata is mandatory: if the date
is not specified, Pelican will rely on the file's "mtime" timestamp, and the is not specified, Pelican will rely on the file's "mtime" timestamp, and the
category can be determined by the directory in which the file resides. For category can be determined by the directory in which the file resides. For

View file

@ -60,9 +60,9 @@ Setting name (default value) What doe
here or a single string representing one locale. here or a single string representing one locale.
When providing a list, all the locales will be tried When providing a list, all the locales will be tried
until one works. until one works.
`MARKUP` (``('rst', 'md', 'html')``) A list of available markup languages you want `MARKUP` (``('rst', 'md')``) A list of available markup languages you want
to use. For the moment, the only available values to use. For the moment, the only available values
are `rst`, `md` and `html`. are `rst` and `md`.
`MD_EXTENSIONS` (``['codehilite','extra']``) A list of the extensions that the Markdown processor `MD_EXTENSIONS` (``['codehilite','extra']``) A list of the extensions that the Markdown processor
will use. Refer to the extensions chapter in the will use. Refer to the extensions chapter in the
Python-Markdown documentation for a complete list of Python-Markdown documentation for a complete list of

View file

@ -147,30 +147,19 @@ class MarkdownReader(Reader):
class HtmlReader(Reader): class HtmlReader(Reader):
file_extensions = ['html', 'htm'] file_extensions = ['html', 'htm']
# re.DOTALL and .*? (minimal match of an arbitrary number of characters) _re = re.compile('\<\!\-\-\#\s?[A-z0-9_-]*\s?\:s?[A-z0-9\s_-]*\s?\-\-\>')
# allow multi-line metadata to be matched correctly
_re = re.compile('<\!--([^\:]*):(.*?)-->', re.DOTALL)
def read(self, filename): def read(self, filename):
"""Parse content and metadata of (x)HTML files. """Parse content and metadata of (x)HTML files"""
with open(filename) as content:
metadata = {'title': 'unnamed'}
for i in self._re.findall(content):
key = i.split(':')[0][5:].strip()
value = i.split(':')[-1][:-3].strip()
name = key.lower()
metadata[name] = self.process_metadata(name, value)
Matches for metadata tags in the form <!-- name:value --> return content, metadata
Activated when you add 'html' to your MARKUP settings variable
"""
content = open(filename)
metadata = {'title': 'unnamed'}
for comment in self._re.findall(content):
key = comment[0].strip().lower()
value = comment[1].strip()
# remove identation from multi-line metadata
value = re.sub('[ \t]+', ' ', value)
value = re.sub(' ?\n ?', '\n', value)
metadata[key] = self.process_metadata(key, value)
return content, metadata
_EXTENSIONS = {} _EXTENSIONS = {}

View file

@ -21,7 +21,7 @@ _DEFAULT_CONFIG = {'PATH': '.',
'PAGE_EXCLUDES': (), 'PAGE_EXCLUDES': (),
'THEME': DEFAULT_THEME, 'THEME': DEFAULT_THEME,
'OUTPUT_PATH': 'output/', 'OUTPUT_PATH': 'output/',
'MARKUP': ('rst', 'md', 'html'), 'MARKUP': ('rst', 'md'),
'STATIC_PATHS': ['images', ], 'STATIC_PATHS': ['images', ],
'THEME_STATIC_PATHS': ['static', ], 'THEME_STATIC_PATHS': ['static', ],
'FEED_ATOM': 'feeds/all.atom.xml', 'FEED_ATOM': 'feeds/all.atom.xml',

View file

@ -1,13 +0,0 @@
<!-- title: A great html article with metadata -->
<!-- tags: foo, bar, foobar -->
<!-- date: 2010-12-02 10:14 -->
<!-- category: yeah -->
<!-- author: Alexis Métaireau -->
<!-- summary:
Multi-line metadata should be supported
as well as <strong>inline markup</strong>.
-->
<!-- custom_field: http://notmyidea.org -->
<h1>This is an article in html with metadata</h1>
<p>It features very interesting insights.</p>

View file

@ -74,8 +74,7 @@ class TestArticlesGenerator(unittest.TestCase):
[u'This is an article with category !', 'published', 'yeah', 'article'], [u'This is an article with category !', 'published', 'yeah', 'article'],
[u'This is an article without category !', 'published', 'Default', 'article'], [u'This is an article without category !', 'published', 'Default', 'article'],
[u'This is an article without category !', 'published', 'TestCategory', 'article'], [u'This is an article without category !', 'published', 'TestCategory', 'article'],
[u'This is a super article !', 'published', 'yeah', 'article'], [u'This is a super article !', 'published', 'yeah', 'article']
[u'A great html article with metadata', 'published', u'yeah', 'article']
] ]
self.assertItemsEqual(articles_expected, articles) self.assertItemsEqual(articles_expected, articles)

View file

@ -109,24 +109,3 @@ class MdReaderTest(unittest.TestCase):
'<h3 id="level2">Level2</h3>' '<h3 id="level2">Level2</h3>'
self.assertEqual(content, expected) self.assertEqual(content, expected)
class HtmlReaderTest(unittest.TestCase):
def test_article_with_metadata(self):
reader = readers.HtmlReader({})
content, metadata = reader.read(_filename('article_with_html_metadata.html'))
expected = {
'category': 'yeah',
'author': u'Alexis Métaireau',
'title': 'A great html article with metadata',
'summary': u'Multi-line metadata should be'\
u' supported\nas well as <strong>inline'\
u' markup</strong>.',
'date': datetime.datetime(2010, 12, 2, 10, 14),
'tags': ['foo', 'bar', 'foobar'],
'custom_field': 'http://notmyidea.org',
}
for key, value in expected.items():
self.assertEquals(value, metadata[key], key)