forked from github/pelican
calling the module-level functions on an unitialised logging object. This allows to - simplify log.py - use one logger object for each file
240 lines
7.6 KiB
Python
240 lines
7.6 KiB
Python
# -*- coding: utf-8 -*-
|
|
import os
|
|
import re
|
|
import pytz
|
|
import shutil
|
|
import logging
|
|
|
|
from codecs import open as _open
|
|
from datetime import datetime
|
|
from itertools import groupby
|
|
from operator import attrgetter
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def get_date(string):
|
|
"""Return a datetime object from a string.
|
|
|
|
If no format matches the given date, raise a ValueError.
|
|
"""
|
|
string = re.sub(' +', ' ', string)
|
|
formats = ['%Y-%m-%d %H:%M', '%Y/%m/%d %H:%M',
|
|
'%Y-%m-%d', '%Y/%m/%d',
|
|
'%d-%m-%Y', '%Y-%d-%m', # Weird ones
|
|
'%d/%m/%Y', '%d.%m.%Y',
|
|
'%d.%m.%Y %H:%M', '%Y-%m-%d %H:%M:%S']
|
|
for date_format in formats:
|
|
try:
|
|
return datetime.strptime(string, date_format)
|
|
except ValueError:
|
|
pass
|
|
raise ValueError("'%s' is not a valid date" % string)
|
|
|
|
|
|
def open(filename):
|
|
"""Open a file and return it's content"""
|
|
return _open(filename, encoding='utf-8').read()
|
|
|
|
|
|
def slugify(value):
|
|
"""
|
|
Normalizes string, converts to lowercase, removes non-alpha characters,
|
|
and converts spaces to hyphens.
|
|
|
|
Took from django sources.
|
|
"""
|
|
if type(value) == unicode:
|
|
import unicodedata
|
|
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
|
|
value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
|
|
return re.sub('[-\s]+', '-', value)
|
|
|
|
|
|
def copy(path, source, destination, destination_path=None, overwrite=False):
|
|
"""Copy path from origin to destination.
|
|
|
|
The function is able to copy either files or directories.
|
|
|
|
:param path: the path to be copied from the source to the destination
|
|
:param source: the source dir
|
|
:param destination: the destination dir
|
|
:param destination_path: the destination path (optional)
|
|
:param overwrite: whether to overwrite the destination if already exists
|
|
or not
|
|
"""
|
|
if not destination_path:
|
|
destination_path = path
|
|
|
|
source_ = os.path.abspath(os.path.expanduser(os.path.join(source, path)))
|
|
destination_ = os.path.abspath(
|
|
os.path.expanduser(os.path.join(destination, destination_path)))
|
|
|
|
if os.path.isdir(source_):
|
|
try:
|
|
shutil.copytree(source_, destination_)
|
|
logger.info('copying %s to %s' % (source_, destination_))
|
|
except OSError:
|
|
if overwrite:
|
|
shutil.rmtree(destination_)
|
|
shutil.copytree(source_, destination_)
|
|
logger.info('replacement of %s with %s' % (source_, destination_))
|
|
|
|
elif os.path.isfile(source_):
|
|
shutil.copy(source_, destination_)
|
|
logger.info('copying %s to %s' % (source_, destination_))
|
|
|
|
|
|
def clean_output_dir(path):
|
|
"""Remove all the files from the output directory"""
|
|
|
|
# remove all the existing content from the output folder
|
|
try:
|
|
shutil.rmtree(path)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def get_relative_path(filename):
|
|
"""Return the relative path to the given filename"""
|
|
return '../' * filename.count('/') + '.'
|
|
|
|
|
|
def truncate_html_words(s, num, end_text='...'):
|
|
"""Truncates HTML to a certain number of words (not counting tags and
|
|
comments). Closes opened tags if they were correctly closed in the given
|
|
html. Takes an optional argument of what should be used to notify that the
|
|
string has been truncated, defaulting to ellipsis (...).
|
|
|
|
Newlines in the HTML are preserved.
|
|
From the django framework.
|
|
"""
|
|
length = int(num)
|
|
if length <= 0:
|
|
return u''
|
|
html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area',
|
|
'hr', 'input')
|
|
|
|
# Set up regular expressions
|
|
re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
|
|
re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
|
|
# Count non-HTML words and keep note of open tags
|
|
pos = 0
|
|
end_text_pos = 0
|
|
words = 0
|
|
open_tags = []
|
|
while words <= length:
|
|
m = re_words.search(s, pos)
|
|
if not m:
|
|
# Checked through whole string
|
|
break
|
|
pos = m.end(0)
|
|
if m.group(1):
|
|
# It's an actual non-HTML word
|
|
words += 1
|
|
if words == length:
|
|
end_text_pos = pos
|
|
continue
|
|
# Check for tag
|
|
tag = re_tag.match(m.group(0))
|
|
if not tag or end_text_pos:
|
|
# Don't worry about non tags or tags after our truncate point
|
|
continue
|
|
closing_tag, tagname, self_closing = tag.groups()
|
|
tagname = tagname.lower() # Element names are always case-insensitive
|
|
if self_closing or tagname in html4_singlets:
|
|
pass
|
|
elif closing_tag:
|
|
# Check for match in open tags list
|
|
try:
|
|
i = open_tags.index(tagname)
|
|
except ValueError:
|
|
pass
|
|
else:
|
|
# SGML: An end tag closes, back to the matching start tag,
|
|
# all unclosed intervening start tags with omitted end tags
|
|
open_tags = open_tags[i + 1:]
|
|
else:
|
|
# Add it to the start of the open tags list
|
|
open_tags.insert(0, tagname)
|
|
if words <= length:
|
|
# Don't try to close tags if we don't need to truncate
|
|
return s
|
|
out = s[:end_text_pos]
|
|
if end_text:
|
|
out += ' ' + end_text
|
|
# Close any tags still open
|
|
for tag in open_tags:
|
|
out += '</%s>' % tag
|
|
# Return string
|
|
return out
|
|
|
|
|
|
def process_translations(content_list):
|
|
""" Finds all translation and returns tuple with two lists (index,
|
|
translations). Index list includes items in default language or items
|
|
which have no variant in default language.
|
|
|
|
Also, for each content_list item, it sets attribute 'translations'
|
|
"""
|
|
content_list.sort(key=attrgetter('slug'))
|
|
grouped_by_slugs = groupby(content_list, attrgetter('slug'))
|
|
index = []
|
|
translations = []
|
|
|
|
for slug, items in grouped_by_slugs:
|
|
items = list(items)
|
|
# find items with default language
|
|
default_lang_items = filter(attrgetter('in_default_lang'), items)
|
|
len_ = len(default_lang_items)
|
|
if len_ > 1:
|
|
logger.warning(u'there are %s variants of "%s"' % (len_, slug))
|
|
for x in default_lang_items:
|
|
logger.warning(' %s' % x.filename)
|
|
elif len_ == 0:
|
|
default_lang_items = items[:1]
|
|
|
|
if not slug:
|
|
logger.warning('empty slug for %r' % (default_lang_items[0].filename,))
|
|
index.extend(default_lang_items)
|
|
translations.extend(filter(
|
|
lambda x: x not in default_lang_items,
|
|
items
|
|
))
|
|
for a in items:
|
|
a.translations = filter(lambda x: x != a, items)
|
|
return index, translations
|
|
|
|
|
|
LAST_MTIME = 0
|
|
|
|
|
|
def files_changed(path, extensions):
|
|
"""Return True if the files have changed since the last check"""
|
|
|
|
def file_times(path):
|
|
"""Return the last time files have been modified"""
|
|
for root, dirs, files in os.walk(path):
|
|
dirs[:] = [x for x in dirs if x[0] != '.']
|
|
for file in files:
|
|
if any(file.endswith(ext) for ext in extensions):
|
|
yield os.stat(os.path.join(root, file)).st_mtime
|
|
|
|
global LAST_MTIME
|
|
mtime = max(file_times(path))
|
|
if mtime > LAST_MTIME:
|
|
LAST_MTIME = mtime
|
|
return True
|
|
return False
|
|
|
|
|
|
def set_date_tzinfo(d, tz_name=None):
|
|
""" Date without tzinfo shoudbe utc.
|
|
This function set the right tz to date that aren't utc and don't have
|
|
tzinfo.
|
|
"""
|
|
if tz_name is not None:
|
|
tz = pytz.timezone(tz_name)
|
|
return tz.localize(d)
|
|
else:
|
|
return d
|