mirror of
https://github.com/getpelican/pelican.git
synced 2025-10-15 20:28:56 +02:00
Merge pull request #538 from bstpierre/sitemap-pages-dont-exist
Fix #537: sitemap.xml contains pages that don't exist
This commit is contained in:
commit
38dac13ee3
1 changed files with 20 additions and 37 deletions
|
|
@ -1,3 +1,4 @@
|
||||||
|
import collections
|
||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
@ -16,34 +17,6 @@ XML_HEADER = u"""<?xml version="1.0" encoding="utf-8"?>
|
||||||
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"
|
||||||
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
|
|
||||||
<url>
|
|
||||||
<loc>{0}/index.html</loc>
|
|
||||||
<lastmod>{1}</lastmod>
|
|
||||||
<changefreq>{2}</changefreq>
|
|
||||||
<priority>{3}</priority>
|
|
||||||
</url>
|
|
||||||
|
|
||||||
<url>
|
|
||||||
<loc>{0}/archives.html</loc>
|
|
||||||
<lastmod>{1}</lastmod>
|
|
||||||
<changefreq>{2}</changefreq>
|
|
||||||
<priority>{3}</priority>
|
|
||||||
</url>
|
|
||||||
|
|
||||||
<url>
|
|
||||||
<loc>{0}/tags.html</loc>
|
|
||||||
<lastmod>{1}</lastmod>
|
|
||||||
<changefreq>{2}</changefreq>
|
|
||||||
<priority>{3}</priority>
|
|
||||||
</url>
|
|
||||||
|
|
||||||
<url>
|
|
||||||
<loc>{0}/categories.html</loc>
|
|
||||||
<lastmod>{1}</lastmod>
|
|
||||||
<changefreq>{2}</changefreq>
|
|
||||||
<priority>{3}</priority>
|
|
||||||
</url>
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
XML_URL = u"""
|
XML_URL = u"""
|
||||||
|
|
@ -146,6 +119,10 @@ class SitemapGenerator(object):
|
||||||
if getattr(page, 'status', 'published') != 'published':
|
if getattr(page, 'status', 'published') != 'published':
|
||||||
return
|
return
|
||||||
|
|
||||||
|
page_path = os.path.join(self.output_path, page.url)
|
||||||
|
if not os.path.exists(page_path):
|
||||||
|
return
|
||||||
|
|
||||||
lastmod = format_date(getattr(page, 'date', self.now))
|
lastmod = format_date(getattr(page, 'date', self.now))
|
||||||
|
|
||||||
if isinstance(page, contents.Article):
|
if isinstance(page, contents.Article):
|
||||||
|
|
@ -176,22 +153,29 @@ class SitemapGenerator(object):
|
||||||
for article in self.context['articles']:
|
for article in self.context['articles']:
|
||||||
pages += article.translations
|
pages += article.translations
|
||||||
|
|
||||||
|
|
||||||
info('writing {0}'.format(path))
|
info('writing {0}'.format(path))
|
||||||
|
|
||||||
with open(path, 'w', encoding='utf-8') as fd:
|
with open(path, 'w', encoding='utf-8') as fd:
|
||||||
|
|
||||||
if self.format == 'xml':
|
if self.format == 'xml':
|
||||||
fd.write(XML_HEADER.format(
|
fd.write(XML_HEADER)
|
||||||
self.siteurl,
|
|
||||||
format_date(self.now),
|
|
||||||
self.changefreqs['indexes'],
|
|
||||||
self.priorities['indexes']
|
|
||||||
)
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
fd.write(TXT_HEADER.format(self.siteurl))
|
fd.write(TXT_HEADER.format(self.siteurl))
|
||||||
|
|
||||||
|
FakePage = collections.namedtuple('FakePage',
|
||||||
|
['status',
|
||||||
|
'date',
|
||||||
|
'url'])
|
||||||
|
|
||||||
|
for standard_page_url in ['index.html',
|
||||||
|
'archives.html',
|
||||||
|
'tags.html',
|
||||||
|
'categories.html']:
|
||||||
|
fake = FakePage(status='published',
|
||||||
|
date=self.now,
|
||||||
|
url=standard_page_url)
|
||||||
|
self.write_url(fake, fd)
|
||||||
|
|
||||||
for page in pages:
|
for page in pages:
|
||||||
self.write_url(page, fd)
|
self.write_url(page, fd)
|
||||||
|
|
||||||
|
|
@ -199,7 +183,6 @@ class SitemapGenerator(object):
|
||||||
fd.write(XML_FOOTER)
|
fd.write(XML_FOOTER)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def get_generators(generators):
|
def get_generators(generators):
|
||||||
return SitemapGenerator
|
return SitemapGenerator
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue