Merge pull request #538 from bstpierre/sitemap-pages-dont-exist

Fix #537: sitemap.xml contains pages that don't exist
This commit is contained in:
Alexis Metaireau 2012-10-12 13:22:16 -07:00
commit 38dac13ee3

View file

@ -1,3 +1,4 @@
import collections
import os.path
from datetime import datetime
@ -16,34 +17,6 @@ XML_HEADER = u"""<?xml version="1.0" encoding="utf-8"?>
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>{0}/index.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
<url>
<loc>{0}/archives.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
<url>
<loc>{0}/tags.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
<url>
<loc>{0}/categories.html</loc>
<lastmod>{1}</lastmod>
<changefreq>{2}</changefreq>
<priority>{3}</priority>
</url>
"""
XML_URL = u"""
@ -146,6 +119,10 @@ class SitemapGenerator(object):
if getattr(page, 'status', 'published') != 'published':
return
page_path = os.path.join(self.output_path, page.url)
if not os.path.exists(page_path):
return
lastmod = format_date(getattr(page, 'date', self.now))
if isinstance(page, contents.Article):
@ -176,22 +153,29 @@ class SitemapGenerator(object):
for article in self.context['articles']:
pages += article.translations
info('writing {0}'.format(path))
with open(path, 'w', encoding='utf-8') as fd:
if self.format == 'xml':
fd.write(XML_HEADER.format(
self.siteurl,
format_date(self.now),
self.changefreqs['indexes'],
self.priorities['indexes']
)
)
fd.write(XML_HEADER)
else:
fd.write(TXT_HEADER.format(self.siteurl))
FakePage = collections.namedtuple('FakePage',
['status',
'date',
'url'])
for standard_page_url in ['index.html',
'archives.html',
'tags.html',
'categories.html']:
fake = FakePage(status='published',
date=self.now,
url=standard_page_url)
self.write_url(fake, fd)
for page in pages:
self.write_url(page, fd)
@ -199,7 +183,6 @@ class SitemapGenerator(object):
fd.write(XML_FOOTER)
def get_generators(generators):
return SitemapGenerator