mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Fine-tune url-value HTML attributes list.
This commit is contained in:
parent
04dba17b80
commit
e538aa2cde
2 changed files with 58 additions and 55 deletions
|
|
@ -125,52 +125,6 @@ class Content(object):
|
|||
if 'summary' in metadata:
|
||||
self._summary = metadata['summary']
|
||||
|
||||
# prepare the list of HTML tag attributes which have a URL value.
|
||||
# refer: http://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
|
||||
self._url_attributes = { # each item in this set is a tuple composed by tag_name, attr_name
|
||||
# HTML4 tags
|
||||
('a', 'href'),
|
||||
('applet', 'codebase'),
|
||||
('area', 'href'),
|
||||
('base', 'href'),
|
||||
('blockquote', 'cite'),
|
||||
('body', 'background'),
|
||||
('del', 'cite'),
|
||||
('form', 'action'),
|
||||
('frame', 'longdesc'),
|
||||
('frame', 'src'),
|
||||
('head', 'profile'),
|
||||
('iframe', 'longdesc'),
|
||||
('iframe', 'src'),
|
||||
('img', 'longdesc'),
|
||||
('img', 'src'),
|
||||
('img', 'usemap'),
|
||||
('input', 'src'),
|
||||
('input', 'usemap'),
|
||||
('ins', 'cite'),
|
||||
('link', 'href'),
|
||||
('object', 'classid'),
|
||||
('object', 'codebase'),
|
||||
('object', 'data'),
|
||||
('object', 'usemap'),
|
||||
('q', 'cite'),
|
||||
('script', 'src'),
|
||||
|
||||
# HTML5 tags
|
||||
('audio', 'src'),
|
||||
('button', 'formaction'),
|
||||
('command', 'icon'),
|
||||
('embed', 'src'),
|
||||
('html', 'manifest'),
|
||||
('input', 'formaction'),
|
||||
('source', 'src'),
|
||||
('video', 'poster'),
|
||||
('video', 'src'),
|
||||
}
|
||||
""":type: set of (tuple of (string, string)"""
|
||||
attribute_names = set(pair[1] for pair in self._url_attributes)
|
||||
self._url_attr_pattern = '|'.join(attribute_names)
|
||||
|
||||
signals.content_object_init.send(self)
|
||||
|
||||
def __str__(self):
|
||||
|
|
@ -235,12 +189,12 @@ class Content(object):
|
|||
|
||||
instrasite_link_regex = self.settings['INTRASITE_LINK_REGEX']
|
||||
regex = r"""
|
||||
(?P<markup><\s*(?P<tag>[^\s\>]+)[^\>]* # match tag with all url-value attributes
|
||||
(?P<attr>{1})\s*=)
|
||||
(?P<markup><\s*[^\>]* # match tag with all url-value attributes
|
||||
(?:href|src|poster|data|cite|formaction|action)\s*=)
|
||||
|
||||
(?P<quote>["\']) # require value to be quoted
|
||||
(?P<path>{0}(?P<value>.*?)) # the url value
|
||||
\4""".format(instrasite_link_regex, self._url_attr_pattern)
|
||||
\2""".format(instrasite_link_regex)
|
||||
hrefs = re.compile(regex, re.X)
|
||||
|
||||
def replacer(m):
|
||||
|
|
@ -249,12 +203,6 @@ class Content(object):
|
|||
path = value.path
|
||||
origin = m.group('path')
|
||||
|
||||
# verify HTML tag and attribute pair to avoid miss-replacing
|
||||
tag = m.group('tag')
|
||||
attr = m.group('attr')
|
||||
if attr != 'href' and attr != 'src' and (tag, attr) not in self._url_attributes:
|
||||
return m.group(0)
|
||||
|
||||
# XXX Put this in a different location.
|
||||
if what == 'filename':
|
||||
if path.startswith('/'):
|
||||
|
|
|
|||
|
|
@ -268,6 +268,61 @@ class TestPage(unittest.TestCase):
|
|||
'?utm_whatever=234&highlight=word#section-2">link</a>'
|
||||
)
|
||||
|
||||
def test_intrasite_link_more(self):
|
||||
# type does not take unicode in PY2 and bytes in PY3, which in
|
||||
# combination with unicode literals leads to following insane line:
|
||||
cls_name = '_DummyAsset' if six.PY3 else b'_DummyAsset'
|
||||
|
||||
args = self.page_kwargs.copy()
|
||||
args['settings'] = get_settings()
|
||||
args['source_path'] = 'content'
|
||||
args['context']['filenames'] = {
|
||||
'images/poster.jpg': type(cls_name, (object,), {'url': 'images/poster.jpg'}),
|
||||
'assets/video.mp4': type(cls_name, (object,), {'url': 'assets/video.mp4'}),
|
||||
'images/graph.svg': type(cls_name, (object,), {'url': 'images/graph.svg'}),
|
||||
'reference.rst': type(cls_name, (object,), {'url': 'reference.html'}),
|
||||
}
|
||||
|
||||
# video.poster
|
||||
args['content'] = (
|
||||
'There is a video with poster '
|
||||
'<video controls poster="{filename}/images/poster.jpg">'
|
||||
'<source src="|filename|/assets/video.mp4" type="video/mp4">'
|
||||
'</video>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(
|
||||
content,
|
||||
'There is a video with poster '
|
||||
'<video controls poster="http://notmyidea.org/images/poster.jpg">'
|
||||
'<source src="http://notmyidea.org/assets/video.mp4" type="video/mp4">'
|
||||
'</video>'
|
||||
)
|
||||
|
||||
# object.data
|
||||
args['content'] = (
|
||||
'There is a svg object '
|
||||
'<object data="{filename}/images/graph.svg" type="image/svg+xml"></object>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(
|
||||
content,
|
||||
'There is a svg object '
|
||||
'<object data="http://notmyidea.org/images/graph.svg" type="image/svg+xml"></object>'
|
||||
)
|
||||
|
||||
# blockquote.cite
|
||||
args['content'] = (
|
||||
'There is a blockquote with cite attribute '
|
||||
'<blockquote cite="{filename}reference.rst">blah blah</blockquote>'
|
||||
)
|
||||
content = Page(**args).get_content('http://notmyidea.org')
|
||||
self.assertEqual(
|
||||
content,
|
||||
'There is a blockquote with cite attribute '
|
||||
'<blockquote cite="http://notmyidea.org/reference.html">blah blah</blockquote>'
|
||||
)
|
||||
|
||||
|
||||
class TestArticle(TestPage):
|
||||
def test_template(self):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue