# -*- coding: utf-8 -*- from __future__ import print_function, unicode_literals import copy import locale import logging import os import re import sys import pytz import six from six.moves.urllib.parse import urlparse, urlunparse from pelican import signals from pelican.settings import DEFAULT_CONFIG from pelican.utils import (SafeDatetime, deprecated_attribute, memoized, path_to_url, posixize_path, python_2_unicode_compatible, sanitised_join, set_date_tzinfo, slugify, strftime, truncate_html_words) # Import these so that they're avalaible when you import from pelican.contents. from pelican.urlwrappers import (Author, Category, Tag, URLWrapper) # NOQA logger = logging.getLogger(__name__) @python_2_unicode_compatible class Content(object): """Represents a content. :param content: the string to parse, containing the original content. :param metadata: the metadata associated to this page (optional). :param settings: the settings dictionary (optional). :param source_path: The location of the source of this content (if any). :param context: The shared context between generators. """ @deprecated_attribute(old='filename', new='source_path', since=(3, 2, 0)) def filename(): return None def __init__(self, content, metadata=None, settings=None, source_path=None, context=None): if metadata is None: metadata = {} if settings is None: settings = copy.deepcopy(DEFAULT_CONFIG) self.settings = settings self._content = content if context is None: context = {} self._context = context self.translations = [] local_metadata = dict() local_metadata.update(metadata) # set metadata as attributes for key, value in local_metadata.items(): if key in ('save_as', 'url'): key = 'override_' + key setattr(self, key.lower(), value) # also keep track of the metadata attributes available self.metadata = local_metadata # default template if it's not defined in page self.template = self._get_template() # First, read the authors from "authors", if not, fallback to "author" # and if not use the settings defined one, if any. if not hasattr(self, 'author'): if hasattr(self, 'authors'): self.author = self.authors[0] elif 'AUTHOR' in settings: self.author = Author(settings['AUTHOR'], settings) if not hasattr(self, 'authors') and hasattr(self, 'author'): self.authors = [self.author] # XXX Split all the following code into pieces, there is too much here. # manage languages self.in_default_lang = True if 'DEFAULT_LANG' in settings: default_lang = settings['DEFAULT_LANG'].lower() if not hasattr(self, 'lang'): self.lang = default_lang self.in_default_lang = (self.lang == default_lang) # create the slug if not existing, generate slug according to # setting of SLUG_ATTRIBUTE if not hasattr(self, 'slug'): if (settings['SLUGIFY_SOURCE'] == 'title' and hasattr(self, 'title')): self.slug = slugify(self.title, settings.get('SLUG_SUBSTITUTIONS', ())) elif (settings['SLUGIFY_SOURCE'] == 'basename' and source_path is not None): basename = os.path.basename( os.path.splitext(source_path)[0]) self.slug = slugify( basename, settings.get('SLUG_SUBSTITUTIONS', ())) self.source_path = source_path # manage the date format if not hasattr(self, 'date_format'): if hasattr(self, 'lang') and self.lang in settings['DATE_FORMATS']: self.date_format = settings['DATE_FORMATS'][self.lang] else: self.date_format = settings['DEFAULT_DATE_FORMAT'] if isinstance(self.date_format, tuple): locale_string = self.date_format[0] if sys.version_info < (3, ) and isinstance(locale_string, six.text_type): locale_string = locale_string.encode('ascii') locale.setlocale(locale.LC_ALL, locale_string) self.date_format = self.date_format[1] # manage timezone default_timezone = settings.get('TIMEZONE', 'UTC') timezone = getattr(self, 'timezone', default_timezone) if hasattr(self, 'date'): self.date = set_date_tzinfo(self.date, timezone) self.locale_date = strftime(self.date, self.date_format) if hasattr(self, 'modified'): self.modified = set_date_tzinfo(self.modified, timezone) self.locale_modified = strftime(self.modified, self.date_format) # manage status if not hasattr(self, 'status'): self.status = getattr(self, 'default_status', None) # store the summary metadata if it is set if 'summary' in metadata: self._summary = metadata['summary'] signals.content_object_init.send(self) def __str__(self): return self.source_path or repr(self) def _has_valid_mandatory_properties(self): """Test mandatory properties are set.""" for prop in self.mandatory_properties: if not hasattr(self, prop): logger.error( "Skipping %s: could not find information about '%s'", self, prop) return False return True def _has_valid_save_as(self): """Return true if save_as doesn't write outside output path, false otherwise.""" try: output_path = self.settings["OUTPUT_PATH"] except KeyError: # we cannot check return True try: sanitised_join(output_path, self.save_as) except RuntimeError: # outside output_dir logger.error( "Skipping %s: file %r would be written outside output path", self, self.save_as, ) return False return True def _has_valid_status(self): if hasattr(self, 'allowed_statuses'): if self.status not in self.allowed_statuses: logger.error( "Unknown status '%s' for file %s, skipping it.", self.status, self ) return False # if undefined we allow all return True def is_valid(self): """Validate Content""" # Use all() to not short circuit and get results of all validations return all([self._has_valid_mandatory_properties(), self._has_valid_save_as(), self._has_valid_status()]) @property def url_format(self): """Returns the URL, formatted with the proper values""" metadata = copy.copy(self.metadata) path = self.metadata.get('path', self.get_relative_source_path()) metadata.update({ 'path': path_to_url(path), 'slug': getattr(self, 'slug', ''), 'lang': getattr(self, 'lang', 'en'), 'date': getattr(self, 'date', SafeDatetime.now()), 'author': self.author.slug if hasattr(self, 'author') else '', 'tag': self.tag.slug if hasattr(self, 'tag') else '', 'category': self.category.slug if hasattr(self, 'category') else '' }) return metadata def _expand_settings(self, key, klass=None): if not klass: klass = self.__class__.__name__ fq_key = ('%s_%s' % (klass, key)).upper() return self.settings[fq_key].format(**self.url_format) def get_url_setting(self, key): if hasattr(self, 'override_' + key): return getattr(self, 'override_' + key) key = key if self.in_default_lang else 'lang_%s' % key return self._expand_settings(key) def _link_replacer(self, siteurl, m): what = m.group('what') value = urlparse(m.group('value')) path = value.path origin = m.group('path') # XXX Put this in a different location. if what in {'filename', 'attach'}: if path.startswith('/'): path = path[1:] else: # relative to the source path of this content path = self.get_relative_source_path( os.path.join(self.relative_dir, path) ) if path not in self._context['filenames']: unquoted_path = path.replace('%20', ' ') if unquoted_path in self._context['filenames']: path = unquoted_path linked_content = self._context['filenames'].get(path) if linked_content: if what == 'attach': if isinstance(linked_content, Static): linked_content.attach_to(self) else: logger.warning( "%s used {attach} link syntax on a " "non-static file. Use {filename} instead.", self.get_relative_source_path()) origin = '/'.join((siteurl, linked_content.url)) origin = origin.replace('\\', '/') # for Windows paths. else: logger.warning( "Unable to find '%s', skipping url replacement.", value.geturl(), extra={ 'limit_msg': ("Other resources were not found " "and their urls not replaced")}) elif what == 'category': origin = '/'.join((siteurl, Category(path, self.settings).url)) elif what == 'tag': origin = '/'.join((siteurl, Tag(path, self.settings).url)) elif what == 'index': origin = '/'.join((siteurl, self.settings['INDEX_SAVE_AS'])) elif what == 'author': origin = '/'.join((siteurl, Author(path, self.settings).url)) else: logger.warning( "Replacement Indicator '%s' not recognized, " "skipping replacement", what) # keep all other parts, such as query, fragment, etc. parts = list(value) parts[2] = origin origin = urlunparse(parts) return ''.join((m.group('markup'), m.group('quote'), origin, m.group('quote'))) def _update_content(self, content, siteurl): """Update the content attribute. Change all the relative paths of the content to relative paths suitable for the output content. :param content: content resource that will be passed to the templates. :param siteurl: siteurl which is locally generated by the writer in case of RELATIVE_URLS. """ if not content: return content instrasite_link_regex = self.settings['INTRASITE_LINK_REGEX'] regex = r""" (?P<[^\>]+ # match tag with all url-value attributes (?:href|src|poster|data|cite|formaction|action)\s*=\s*) (?P["\']) # require value to be quoted (?P{0}(?P.*?)) # the url value \2""".format(instrasite_link_regex) hrefs = re.compile(regex, re.X) return hrefs.sub(lambda m: self._link_replacer(siteurl, m), content) def get_siteurl(self): return self._context.get('localsiteurl', '') @memoized def get_content(self, siteurl): if hasattr(self, '_get_content'): content = self._get_content() else: content = self._content return self._update_content(content, siteurl) @property def content(self): return self.get_content(self.get_siteurl()) @memoized def get_summary(self, siteurl): """Returns the summary of an article. This is based on the summary metadata if set, otherwise truncate the content. """ if hasattr(self, '_summary'): return self._update_content(self._summary, siteurl) if self.settings['SUMMARY_MAX_LENGTH'] is None: return self.content return truncate_html_words(self.content, self.settings['SUMMARY_MAX_LENGTH']) @property def summary(self): return self.get_summary(self.get_siteurl()) def _get_summary(self): """deprecated function to access summary""" logger.warning('_get_summary() has been deprecated since 3.6.4. ' 'Use the summary decorator instead') return self.summary @summary.setter def summary(self, value): """Dummy function""" pass @property def status(self): return self._status @status.setter def status(self, value): # TODO maybe typecheck self._status = value.lower() @property def url(self): return self.get_url_setting('url') @property def save_as(self): return self.get_url_setting('save_as') def _get_template(self): if hasattr(self, 'template') and self.template is not None: return self.template else: return self.default_template def get_relative_source_path(self, source_path=None): """Return the relative path (from the content path) to the given source_path. If no source path is specified, use the source path of this content object. """ if not source_path: source_path = self.source_path if source_path is None: return None return posixize_path( os.path.relpath( os.path.abspath(os.path.join( self.settings['PATH'], source_path)), os.path.abspath(self.settings['PATH']) )) @property def relative_dir(self): return posixize_path( os.path.dirname( os.path.relpath( os.path.abspath(self.source_path), os.path.abspath(self.settings['PATH'])))) class Page(Content): mandatory_properties = ('title',) allowed_statuses = ('published', 'hidden') default_status = 'published' default_template = 'page' class Article(Content): mandatory_properties = ('title', 'date', 'category') allowed_statuses = ('published', 'draft') default_status = 'published' default_template = 'article' def __init__(self, *args, **kwargs): super(Article, self).__init__(*args, **kwargs) # handle WITH_FUTURE_DATES (designate article to draft based on date) if not self.settings['WITH_FUTURE_DATES'] and hasattr(self, 'date'): if self.date.tzinfo is None: now = SafeDatetime.now() else: now = SafeDatetime.utcnow().replace(tzinfo=pytz.utc) if self.date > now: self.status = 'draft' # if we are a draft and there is no date provided, set max datetime if not hasattr(self, 'date') and self.status == 'draft': self.date = SafeDatetime.max def _expand_settings(self, key): klass = 'article' if self.status == 'published' else 'draft' return super(Article, self)._expand_settings(key, klass) @python_2_unicode_compatible class Static(Page): def __init__(self, *args, **kwargs): super(Static, self).__init__(*args, **kwargs) self._output_location_referenced = False @deprecated_attribute(old='filepath', new='source_path', since=(3, 2, 0)) def filepath(): return None @deprecated_attribute(old='src', new='source_path', since=(3, 2, 0)) def src(): return None @deprecated_attribute(old='dst', new='save_as', since=(3, 2, 0)) def dst(): return None @property def url(self): # Note when url has been referenced, so we can avoid overriding it. self._output_location_referenced = True return super(Static, self).url @property def save_as(self): # Note when save_as has been referenced, so we can avoid overriding it. self._output_location_referenced = True return super(Static, self).save_as def attach_to(self, content): """Override our output directory with that of the given content object. """ # Determine our file's new output path relative to the linking # document. If it currently lives beneath the linking # document's source directory, preserve that relationship on output. # Otherwise, make it a sibling. linking_source_dir = os.path.dirname(content.source_path) tail_path = os.path.relpath(self.source_path, linking_source_dir) if tail_path.startswith(os.pardir + os.sep): tail_path = os.path.basename(tail_path) new_save_as = os.path.join( os.path.dirname(content.save_as), tail_path) # We do not build our new url by joining tail_path with the linking # document's url, because we cannot know just by looking at the latter # whether it points to the document itself or to its parent directory. # (An url like 'some/content' might mean a directory named 'some' # with a file named 'content', or it might mean a directory named # 'some/content' with a file named 'index.html'.) Rather than trying # to figure it out by comparing the linking document's url and save_as # path, we simply build our new url from our new save_as path. new_url = path_to_url(new_save_as) def _log_reason(reason): logger.warning( "The {attach} link in %s cannot relocate " "%s because %s. Falling back to " "{filename} link behavior instead.", content.get_relative_source_path(), self.get_relative_source_path(), reason, extra={'limit_msg': "More {attach} warnings silenced."}) # We never override an override, because we don't want to interfere # with user-defined overrides that might be in EXTRA_PATH_METADATA. if hasattr(self, 'override_save_as') or hasattr(self, 'override_url'): if new_save_as != self.save_as or new_url != self.url: _log_reason("its output location was already overridden") return # We never change an output path that has already been referenced, # because we don't want to break links that depend on that path. if self._output_location_referenced: if new_save_as != self.save_as or new_url != self.url: _log_reason("another link already referenced its location") return self.override_save_as = new_save_as self.override_url = new_url