mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Revert "Get HtmlReader to work again"
This reverts commit 39db9ddcfd.
Conflicts:
tests/test_readers.py
This commit is contained in:
parent
4349a5e815
commit
84c708b74b
7 changed files with 15 additions and 72 deletions
|
|
@ -172,17 +172,6 @@ Markdown posts should follow this pattern::
|
||||||
|
|
||||||
This is the content of my super blog post.
|
This is the content of my super blog post.
|
||||||
|
|
||||||
Your third option is to write raw html (by ending your file in ``.html``)::
|
|
||||||
|
|
||||||
<!-- title: My super title -->
|
|
||||||
<!-- date: 2010-12-03 10:20 -->
|
|
||||||
<!-- tags: thats, awesome -->
|
|
||||||
<!-- category: yeah -->
|
|
||||||
|
|
||||||
<p>
|
|
||||||
This is the content of my super blog post.
|
|
||||||
</p>
|
|
||||||
|
|
||||||
Note that, aside from the title, none of this metadata is mandatory: if the date
|
Note that, aside from the title, none of this metadata is mandatory: if the date
|
||||||
is not specified, Pelican will rely on the file's "mtime" timestamp, and the
|
is not specified, Pelican will rely on the file's "mtime" timestamp, and the
|
||||||
category can be determined by the directory in which the file resides. For
|
category can be determined by the directory in which the file resides. For
|
||||||
|
|
|
||||||
|
|
@ -60,9 +60,9 @@ Setting name (default value) What doe
|
||||||
here or a single string representing one locale.
|
here or a single string representing one locale.
|
||||||
When providing a list, all the locales will be tried
|
When providing a list, all the locales will be tried
|
||||||
until one works.
|
until one works.
|
||||||
`MARKUP` (``('rst', 'md', 'html')``) A list of available markup languages you want
|
`MARKUP` (``('rst', 'md')``) A list of available markup languages you want
|
||||||
to use. For the moment, the only available values
|
to use. For the moment, the only available values
|
||||||
are `rst`, `md` and `html`.
|
are `rst` and `md`.
|
||||||
`MD_EXTENSIONS` (``['codehilite','extra']``) A list of the extensions that the Markdown processor
|
`MD_EXTENSIONS` (``['codehilite','extra']``) A list of the extensions that the Markdown processor
|
||||||
will use. Refer to the extensions chapter in the
|
will use. Refer to the extensions chapter in the
|
||||||
Python-Markdown documentation for a complete list of
|
Python-Markdown documentation for a complete list of
|
||||||
|
|
|
||||||
|
|
@ -147,30 +147,19 @@ class MarkdownReader(Reader):
|
||||||
|
|
||||||
class HtmlReader(Reader):
|
class HtmlReader(Reader):
|
||||||
file_extensions = ['html', 'htm']
|
file_extensions = ['html', 'htm']
|
||||||
# re.DOTALL and .*? (minimal match of an arbitrary number of characters)
|
_re = re.compile('\<\!\-\-\#\s?[A-z0-9_-]*\s?\:s?[A-z0-9\s_-]*\s?\-\-\>')
|
||||||
# allow multi-line metadata to be matched correctly
|
|
||||||
_re = re.compile('<\!--([^\:]*):(.*?)-->', re.DOTALL)
|
|
||||||
|
|
||||||
def read(self, filename):
|
def read(self, filename):
|
||||||
"""Parse content and metadata of (x)HTML files.
|
"""Parse content and metadata of (x)HTML files"""
|
||||||
|
with open(filename) as content:
|
||||||
|
metadata = {'title': 'unnamed'}
|
||||||
|
for i in self._re.findall(content):
|
||||||
|
key = i.split(':')[0][5:].strip()
|
||||||
|
value = i.split(':')[-1][:-3].strip()
|
||||||
|
name = key.lower()
|
||||||
|
metadata[name] = self.process_metadata(name, value)
|
||||||
|
|
||||||
Matches for metadata tags in the form <!-- name:value -->
|
return content, metadata
|
||||||
Activated when you add 'html' to your MARKUP settings variable
|
|
||||||
|
|
||||||
"""
|
|
||||||
content = open(filename)
|
|
||||||
metadata = {'title': 'unnamed'}
|
|
||||||
for comment in self._re.findall(content):
|
|
||||||
key = comment[0].strip().lower()
|
|
||||||
value = comment[1].strip()
|
|
||||||
|
|
||||||
# remove identation from multi-line metadata
|
|
||||||
value = re.sub('[ \t]+', ' ', value)
|
|
||||||
value = re.sub(' ?\n ?', '\n', value)
|
|
||||||
|
|
||||||
metadata[key] = self.process_metadata(key, value)
|
|
||||||
|
|
||||||
return content, metadata
|
|
||||||
|
|
||||||
|
|
||||||
_EXTENSIONS = {}
|
_EXTENSIONS = {}
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ _DEFAULT_CONFIG = {'PATH': '.',
|
||||||
'PAGE_EXCLUDES': (),
|
'PAGE_EXCLUDES': (),
|
||||||
'THEME': DEFAULT_THEME,
|
'THEME': DEFAULT_THEME,
|
||||||
'OUTPUT_PATH': 'output/',
|
'OUTPUT_PATH': 'output/',
|
||||||
'MARKUP': ('rst', 'md', 'html'),
|
'MARKUP': ('rst', 'md'),
|
||||||
'STATIC_PATHS': ['images', ],
|
'STATIC_PATHS': ['images', ],
|
||||||
'THEME_STATIC_PATHS': ['static', ],
|
'THEME_STATIC_PATHS': ['static', ],
|
||||||
'FEED_ATOM': 'feeds/all.atom.xml',
|
'FEED_ATOM': 'feeds/all.atom.xml',
|
||||||
|
|
|
||||||
|
|
@ -1,13 +0,0 @@
|
||||||
<!-- title: A great html article with metadata -->
|
|
||||||
<!-- tags: foo, bar, foobar -->
|
|
||||||
<!-- date: 2010-12-02 10:14 -->
|
|
||||||
<!-- category: yeah -->
|
|
||||||
<!-- author: Alexis Métaireau -->
|
|
||||||
<!-- summary:
|
|
||||||
Multi-line metadata should be supported
|
|
||||||
as well as <strong>inline markup</strong>.
|
|
||||||
-->
|
|
||||||
<!-- custom_field: http://notmyidea.org -->
|
|
||||||
|
|
||||||
<h1>This is an article in html with metadata</h1>
|
|
||||||
<p>It features very interesting insights.</p>
|
|
||||||
|
|
@ -74,8 +74,7 @@ class TestArticlesGenerator(unittest.TestCase):
|
||||||
[u'This is an article with category !', 'published', 'yeah', 'article'],
|
[u'This is an article with category !', 'published', 'yeah', 'article'],
|
||||||
[u'This is an article without category !', 'published', 'Default', 'article'],
|
[u'This is an article without category !', 'published', 'Default', 'article'],
|
||||||
[u'This is an article without category !', 'published', 'TestCategory', 'article'],
|
[u'This is an article without category !', 'published', 'TestCategory', 'article'],
|
||||||
[u'This is a super article !', 'published', 'yeah', 'article'],
|
[u'This is a super article !', 'published', 'yeah', 'article']
|
||||||
[u'A great html article with metadata', 'published', u'yeah', 'article']
|
|
||||||
]
|
]
|
||||||
self.assertItemsEqual(articles_expected, articles)
|
self.assertItemsEqual(articles_expected, articles)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -109,24 +109,3 @@ class MdReaderTest(unittest.TestCase):
|
||||||
'<h3 id="level2">Level2</h3>'
|
'<h3 id="level2">Level2</h3>'
|
||||||
|
|
||||||
self.assertEqual(content, expected)
|
self.assertEqual(content, expected)
|
||||||
|
|
||||||
|
|
||||||
class HtmlReaderTest(unittest.TestCase):
|
|
||||||
|
|
||||||
def test_article_with_metadata(self):
|
|
||||||
reader = readers.HtmlReader({})
|
|
||||||
content, metadata = reader.read(_filename('article_with_html_metadata.html'))
|
|
||||||
expected = {
|
|
||||||
'category': 'yeah',
|
|
||||||
'author': u'Alexis Métaireau',
|
|
||||||
'title': 'A great html article with metadata',
|
|
||||||
'summary': u'Multi-line metadata should be'\
|
|
||||||
u' supported\nas well as <strong>inline'\
|
|
||||||
u' markup</strong>.',
|
|
||||||
'date': datetime.datetime(2010, 12, 2, 10, 14),
|
|
||||||
'tags': ['foo', 'bar', 'foobar'],
|
|
||||||
'custom_field': 'http://notmyidea.org',
|
|
||||||
}
|
|
||||||
|
|
||||||
for key, value in expected.items():
|
|
||||||
self.assertEquals(value, metadata[key], key)
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue