mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Fine-tune url-value HTML attributes list.
This commit is contained in:
parent
04dba17b80
commit
e538aa2cde
2 changed files with 58 additions and 55 deletions
|
|
@ -125,52 +125,6 @@ class Content(object):
|
||||||
if 'summary' in metadata:
|
if 'summary' in metadata:
|
||||||
self._summary = metadata['summary']
|
self._summary = metadata['summary']
|
||||||
|
|
||||||
# prepare the list of HTML tag attributes which have a URL value.
|
|
||||||
# refer: http://stackoverflow.com/questions/2725156/complete-list-of-html-tag-attributes-which-have-a-url-value
|
|
||||||
self._url_attributes = { # each item in this set is a tuple composed by tag_name, attr_name
|
|
||||||
# HTML4 tags
|
|
||||||
('a', 'href'),
|
|
||||||
('applet', 'codebase'),
|
|
||||||
('area', 'href'),
|
|
||||||
('base', 'href'),
|
|
||||||
('blockquote', 'cite'),
|
|
||||||
('body', 'background'),
|
|
||||||
('del', 'cite'),
|
|
||||||
('form', 'action'),
|
|
||||||
('frame', 'longdesc'),
|
|
||||||
('frame', 'src'),
|
|
||||||
('head', 'profile'),
|
|
||||||
('iframe', 'longdesc'),
|
|
||||||
('iframe', 'src'),
|
|
||||||
('img', 'longdesc'),
|
|
||||||
('img', 'src'),
|
|
||||||
('img', 'usemap'),
|
|
||||||
('input', 'src'),
|
|
||||||
('input', 'usemap'),
|
|
||||||
('ins', 'cite'),
|
|
||||||
('link', 'href'),
|
|
||||||
('object', 'classid'),
|
|
||||||
('object', 'codebase'),
|
|
||||||
('object', 'data'),
|
|
||||||
('object', 'usemap'),
|
|
||||||
('q', 'cite'),
|
|
||||||
('script', 'src'),
|
|
||||||
|
|
||||||
# HTML5 tags
|
|
||||||
('audio', 'src'),
|
|
||||||
('button', 'formaction'),
|
|
||||||
('command', 'icon'),
|
|
||||||
('embed', 'src'),
|
|
||||||
('html', 'manifest'),
|
|
||||||
('input', 'formaction'),
|
|
||||||
('source', 'src'),
|
|
||||||
('video', 'poster'),
|
|
||||||
('video', 'src'),
|
|
||||||
}
|
|
||||||
""":type: set of (tuple of (string, string)"""
|
|
||||||
attribute_names = set(pair[1] for pair in self._url_attributes)
|
|
||||||
self._url_attr_pattern = '|'.join(attribute_names)
|
|
||||||
|
|
||||||
signals.content_object_init.send(self)
|
signals.content_object_init.send(self)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
|
@ -235,12 +189,12 @@ class Content(object):
|
||||||
|
|
||||||
instrasite_link_regex = self.settings['INTRASITE_LINK_REGEX']
|
instrasite_link_regex = self.settings['INTRASITE_LINK_REGEX']
|
||||||
regex = r"""
|
regex = r"""
|
||||||
(?P<markup><\s*(?P<tag>[^\s\>]+)[^\>]* # match tag with all url-value attributes
|
(?P<markup><\s*[^\>]* # match tag with all url-value attributes
|
||||||
(?P<attr>{1})\s*=)
|
(?:href|src|poster|data|cite|formaction|action)\s*=)
|
||||||
|
|
||||||
(?P<quote>["\']) # require value to be quoted
|
(?P<quote>["\']) # require value to be quoted
|
||||||
(?P<path>{0}(?P<value>.*?)) # the url value
|
(?P<path>{0}(?P<value>.*?)) # the url value
|
||||||
\4""".format(instrasite_link_regex, self._url_attr_pattern)
|
\2""".format(instrasite_link_regex)
|
||||||
hrefs = re.compile(regex, re.X)
|
hrefs = re.compile(regex, re.X)
|
||||||
|
|
||||||
def replacer(m):
|
def replacer(m):
|
||||||
|
|
@ -249,12 +203,6 @@ class Content(object):
|
||||||
path = value.path
|
path = value.path
|
||||||
origin = m.group('path')
|
origin = m.group('path')
|
||||||
|
|
||||||
# verify HTML tag and attribute pair to avoid miss-replacing
|
|
||||||
tag = m.group('tag')
|
|
||||||
attr = m.group('attr')
|
|
||||||
if attr != 'href' and attr != 'src' and (tag, attr) not in self._url_attributes:
|
|
||||||
return m.group(0)
|
|
||||||
|
|
||||||
# XXX Put this in a different location.
|
# XXX Put this in a different location.
|
||||||
if what == 'filename':
|
if what == 'filename':
|
||||||
if path.startswith('/'):
|
if path.startswith('/'):
|
||||||
|
|
|
||||||
|
|
@ -268,6 +268,61 @@ class TestPage(unittest.TestCase):
|
||||||
'?utm_whatever=234&highlight=word#section-2">link</a>'
|
'?utm_whatever=234&highlight=word#section-2">link</a>'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_intrasite_link_more(self):
|
||||||
|
# type does not take unicode in PY2 and bytes in PY3, which in
|
||||||
|
# combination with unicode literals leads to following insane line:
|
||||||
|
cls_name = '_DummyAsset' if six.PY3 else b'_DummyAsset'
|
||||||
|
|
||||||
|
args = self.page_kwargs.copy()
|
||||||
|
args['settings'] = get_settings()
|
||||||
|
args['source_path'] = 'content'
|
||||||
|
args['context']['filenames'] = {
|
||||||
|
'images/poster.jpg': type(cls_name, (object,), {'url': 'images/poster.jpg'}),
|
||||||
|
'assets/video.mp4': type(cls_name, (object,), {'url': 'assets/video.mp4'}),
|
||||||
|
'images/graph.svg': type(cls_name, (object,), {'url': 'images/graph.svg'}),
|
||||||
|
'reference.rst': type(cls_name, (object,), {'url': 'reference.html'}),
|
||||||
|
}
|
||||||
|
|
||||||
|
# video.poster
|
||||||
|
args['content'] = (
|
||||||
|
'There is a video with poster '
|
||||||
|
'<video controls poster="{filename}/images/poster.jpg">'
|
||||||
|
'<source src="|filename|/assets/video.mp4" type="video/mp4">'
|
||||||
|
'</video>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://notmyidea.org')
|
||||||
|
self.assertEqual(
|
||||||
|
content,
|
||||||
|
'There is a video with poster '
|
||||||
|
'<video controls poster="http://notmyidea.org/images/poster.jpg">'
|
||||||
|
'<source src="http://notmyidea.org/assets/video.mp4" type="video/mp4">'
|
||||||
|
'</video>'
|
||||||
|
)
|
||||||
|
|
||||||
|
# object.data
|
||||||
|
args['content'] = (
|
||||||
|
'There is a svg object '
|
||||||
|
'<object data="{filename}/images/graph.svg" type="image/svg+xml"></object>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://notmyidea.org')
|
||||||
|
self.assertEqual(
|
||||||
|
content,
|
||||||
|
'There is a svg object '
|
||||||
|
'<object data="http://notmyidea.org/images/graph.svg" type="image/svg+xml"></object>'
|
||||||
|
)
|
||||||
|
|
||||||
|
# blockquote.cite
|
||||||
|
args['content'] = (
|
||||||
|
'There is a blockquote with cite attribute '
|
||||||
|
'<blockquote cite="{filename}reference.rst">blah blah</blockquote>'
|
||||||
|
)
|
||||||
|
content = Page(**args).get_content('http://notmyidea.org')
|
||||||
|
self.assertEqual(
|
||||||
|
content,
|
||||||
|
'There is a blockquote with cite attribute '
|
||||||
|
'<blockquote cite="http://notmyidea.org/reference.html">blah blah</blockquote>'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestArticle(TestPage):
|
class TestArticle(TestPage):
|
||||||
def test_template(self):
|
def test_template(self):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue