From f337f5067bb4a99f18810ec175975d4953824bc3 Mon Sep 17 00:00:00 2001 From: Nicolas Steinmetz Date: Tue, 30 Aug 2011 16:37:59 +0300 Subject: [PATCH 1/5] Fix as in dotclear, format is not "markdown" but "wiki". So add an "or" statement. --- tools/pelican-import | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/pelican-import b/tools/pelican-import index 44f02fbd..a85e55f3 100755 --- a/tools/pelican-import +++ b/tools/pelican-import @@ -113,7 +113,7 @@ def dc2fields(file): if cat_id: categories = [category_list[id].strip() for id in cat_id.split(',')] - if post_format == "markdown": + if (post_format == "markdown") or (post_format == "wiki"): content = post_excerpt + post_content else: content = post_excerpt_xhtml + post_content_xhtml @@ -163,7 +163,7 @@ def build_markdown_header(title, date, author, categories, tags): def fields2pelican(fields, output_path, dircat=False): for title, content, filename, date, author, categories, tags, markup in fields: - if markup == "markdown": + if (markup == "markdown") or (markup == "wiki"): ext = '.md' header = build_markdown_header(title, date, author, categories, tags) else: From de790b9e725f21c93cead0efd85933be46d08456 Mon Sep 17 00:00:00 2001 From: Nicolas Steinmetz Date: Tue, 30 Aug 2011 17:14:05 +0300 Subject: [PATCH 2/5] for the --dir-cat option, also slugify category's name to avoid encoding or spaces issues or ... --- tools/pelican-import | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/pelican-import b/tools/pelican-import index a85e55f3..be8a1a51 100755 --- a/tools/pelican-import +++ b/tools/pelican-import @@ -174,7 +174,7 @@ def fields2pelican(fields, output_path, dircat=False): # option to put files in directories with categories names if dircat and (len(categories) == 1): - catname = categories[0] + catname = slugify(categories[0]) out_filename = os.path.join(output_path, catname, filename+'.rst') if not os.path.isdir(os.path.join(output_path, catname)): os.mkdir(os.path.join(output_path, catname)) From 8f6da4fa7f5c5094c8a082d5ecc9e0c3589aa890 Mon Sep 17 00:00:00 2001 From: Nicolas Steinmetz Date: Tue, 30 Aug 2011 22:27:43 +0200 Subject: [PATCH 3/5] final fix for dotclear import --- tools/pelican-import | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/tools/pelican-import b/tools/pelican-import index be8a1a51..b8c9197c 100755 --- a/tools/pelican-import +++ b/tools/pelican-import @@ -113,14 +113,17 @@ def dc2fields(file): if cat_id: categories = [category_list[id].strip() for id in cat_id.split(',')] - if (post_format == "markdown") or (post_format == "wiki"): + """ + Weird - dotclear2 at least does not use markdown ; so wonder about the use case on "markdown" + """ + if post_format == "markdown": content = post_excerpt + post_content else: content = post_excerpt_xhtml + post_content_xhtml content = content.replace('\\n', '') post_format = "html" - yield (post_title, content, post_url, post_creadt, author, categories, tags, post_format) + yield (post_title, content, slugify(post_title), post_creadt, author, categories, tags, post_format) def feed2fields(file): @@ -163,7 +166,7 @@ def build_markdown_header(title, date, author, categories, tags): def fields2pelican(fields, output_path, dircat=False): for title, content, filename, date, author, categories, tags, markup in fields: - if (markup == "markdown") or (markup == "wiki"): + if (markup == "markdown"): ext = '.md' header = build_markdown_header(title, date, author, categories, tags) else: @@ -175,7 +178,7 @@ def fields2pelican(fields, output_path, dircat=False): # option to put files in directories with categories names if dircat and (len(categories) == 1): catname = slugify(categories[0]) - out_filename = os.path.join(output_path, catname, filename+'.rst') + out_filename = os.path.join(output_path, catname, filename+ext) if not os.path.isdir(os.path.join(output_path, catname)): os.mkdir(os.path.join(output_path, catname)) else: @@ -189,8 +192,7 @@ def fields2pelican(fields, output_path, dircat=False): with open(html_filename, 'w', encoding='utf-8') as fp: fp.write(content) - os.system('pandoc --normalize --reference-links --from=html --to=rst -o "%s" "%s"' % (out_filename, - html_filename)) + os.system('pandoc --normalize --reference-links --from=html --to=rst -o "%s" "%s"' % (out_filename, html_filename)) os.remove(html_filename) From a50e986f80e7e28e19c4d80a8721b3506064324c Mon Sep 17 00:00:00 2001 From: Nicolas Steinmetz Date: Tue, 30 Aug 2011 23:49:45 +0200 Subject: [PATCH 4/5] add tag import feature for dotclear import --- tools/pelican-import | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tools/pelican-import b/tools/pelican-import index b8c9197c..a57ce25d 100755 --- a/tools/pelican-import +++ b/tools/pelican-import @@ -37,6 +37,8 @@ def wp2fields(xml): def dc2fields(file): """Opens a Dotclear export file, and yield pelican fields""" + from BeautifulSoup import BeautifulStoneSoup, BeautifulSoup + in_cat = False in_post = False category_list = {} @@ -100,7 +102,7 @@ def dc2fields(file): # post_open_tb = fields[24] # nb_comment = fields[25] # nb_trackback = fields[26] - # post_meta = fields[27] + post_meta = fields[27] # redirect_url = fields[28][:-1] # remove seconds @@ -113,6 +115,22 @@ def dc2fields(file): if cat_id: categories = [category_list[id].strip() for id in cat_id.split(',')] + tag = post_meta.replace('{', '').replace('}', '').replace('a:1:s:3:\\"tag\\";a:', '').replace('a:0:', '') + # We handle only line that have tags syntax + if len(tag) > 1: + if int(tag[:1]) == 1: + newtag = tag.split('"')[1] + tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES ))) + else: + i=1 + j=1 + while(i <= int(tag[:1])): + newtag = tag.split('"')[j].replace('\\','') + tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES ))) + i=i+1 + if j < int(tag[:1])*2: + j=j+2 + """ Weird - dotclear2 at least does not use markdown ; so wonder about the use case on "markdown" """ From 358e92cf0444e842cd7f5048b83b4835949cef6e Mon Sep 17 00:00:00 2001 From: Nicolas Steinmetz Date: Fri, 30 Sep 2011 22:48:16 +0200 Subject: [PATCH 5/5] Clean dot clear import --- tools/pelican-import | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/pelican-import b/tools/pelican-import index a57ce25d..4392d9ff 100755 --- a/tools/pelican-import +++ b/tools/pelican-import @@ -115,8 +115,8 @@ def dc2fields(file): if cat_id: categories = [category_list[id].strip() for id in cat_id.split(',')] + # Get tags related to a post tag = post_meta.replace('{', '').replace('}', '').replace('a:1:s:3:\\"tag\\";a:', '').replace('a:0:', '') - # We handle only line that have tags syntax if len(tag) > 1: if int(tag[:1]) == 1: newtag = tag.split('"')[1] @@ -132,7 +132,8 @@ def dc2fields(file): j=j+2 """ - Weird - dotclear2 at least does not use markdown ; so wonder about the use case on "markdown" + dotclear2 does not use markdown by default unless you use the markdown plugin + Ref: http://plugins.dotaddict.org/dc2/details/formatting-markdown """ if post_format == "markdown": content = post_excerpt + post_content