mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Added support for HTML as input format
This commit is contained in:
parent
ca58928b6c
commit
c019443026
1 changed files with 19 additions and 1 deletions
|
|
@ -59,7 +59,25 @@ class MarkdownReader(object):
|
|||
)(value[0])
|
||||
return content, metadatas
|
||||
|
||||
_EXTENSIONS = {'rst': RstReader, 'md': MarkdownReader} # supported formats
|
||||
|
||||
class HtmlReader(object):
|
||||
_re = re.compile('\<\!\-\-\#\s?[A-z0-9_-]*\s?\:s?[A-z0-9\s_-]*\s?\-\-\>')
|
||||
|
||||
def read(self, filename):
|
||||
"""Parse content and metadata of (x)HTML files"""
|
||||
content = open(filename)
|
||||
metadatas = {'title':'unnamed'}
|
||||
for i in self._re.findall(content):
|
||||
key = i.split(':')[0][5:].strip()
|
||||
value = i.split(':')[-1][:-3].strip()
|
||||
print [key,value]
|
||||
metadatas[key.lower()] = value
|
||||
|
||||
return content, metadatas
|
||||
|
||||
|
||||
|
||||
_EXTENSIONS = {'rst': RstReader, 'md': MarkdownReader, 'html': HtmlReader} # supported formats
|
||||
|
||||
|
||||
def read_file(filename, fmt=None):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue