From f337f5067bb4a99f18810ec175975d4953824bc3 Mon Sep 17 00:00:00 2001
From: Nicolas Steinmetz <nsteinmetz@gmail.com>
Date: Tue, 30 Aug 2011 16:37:59 +0300
Subject: [PATCH 1/5] Fix as in dotclear, format is not "markdown" but "wiki".
 So add an "or" statement.

---
 tools/pelican-import | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/pelican-import b/tools/pelican-import
index 44f02fbd..a85e55f3 100755
--- a/tools/pelican-import
+++ b/tools/pelican-import
@@ -113,7 +113,7 @@ def dc2fields(file):
         if cat_id:
             categories = [category_list[id].strip() for id in cat_id.split(',')]
 
-        if post_format == "markdown":
+        if (post_format == "markdown") or (post_format == "wiki"):
             content = post_excerpt + post_content
         else:
             content = post_excerpt_xhtml + post_content_xhtml
@@ -163,7 +163,7 @@ def build_markdown_header(title, date, author, categories, tags):
 
 def fields2pelican(fields, output_path, dircat=False):
     for title, content, filename, date, author, categories, tags, markup in fields:
-        if markup == "markdown":
+        if (markup == "markdown") or (markup == "wiki"):
             ext = '.md'
             header = build_markdown_header(title, date, author, categories, tags)
         else:

From de790b9e725f21c93cead0efd85933be46d08456 Mon Sep 17 00:00:00 2001
From: Nicolas Steinmetz <nsteinmetz@gmail.com>
Date: Tue, 30 Aug 2011 17:14:05 +0300
Subject: [PATCH 2/5] for the --dir-cat option, also slugify category's name to
 avoid encoding or spaces issues or ...

---
 tools/pelican-import | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/pelican-import b/tools/pelican-import
index a85e55f3..be8a1a51 100755
--- a/tools/pelican-import
+++ b/tools/pelican-import
@@ -174,7 +174,7 @@ def fields2pelican(fields, output_path, dircat=False):
 
         # option to put files in directories with categories names
         if dircat and (len(categories) == 1):
-            catname = categories[0]
+            catname = slugify(categories[0])
             out_filename = os.path.join(output_path, catname, filename+'.rst')
             if not os.path.isdir(os.path.join(output_path, catname)):
                 os.mkdir(os.path.join(output_path, catname))

From 8f6da4fa7f5c5094c8a082d5ecc9e0c3589aa890 Mon Sep 17 00:00:00 2001
From: Nicolas Steinmetz <nsteinmetz@gmail.com>
Date: Tue, 30 Aug 2011 22:27:43 +0200
Subject: [PATCH 3/5] final fix for dotclear import

---
 tools/pelican-import | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/tools/pelican-import b/tools/pelican-import
index be8a1a51..b8c9197c 100755
--- a/tools/pelican-import
+++ b/tools/pelican-import
@@ -113,14 +113,17 @@ def dc2fields(file):
         if cat_id:
             categories = [category_list[id].strip() for id in cat_id.split(',')]
 
-        if (post_format == "markdown") or (post_format == "wiki"):
+        """
+        Weird - dotclear2 at least does not use markdown ; so wonder about the use case on "markdown"
+        """
+        if post_format == "markdown":
             content = post_excerpt + post_content
         else:
             content = post_excerpt_xhtml + post_content_xhtml
             content = content.replace('\\n', '')
             post_format = "html"
 
-        yield (post_title, content, post_url, post_creadt, author, categories, tags, post_format)
+        yield (post_title, content, slugify(post_title), post_creadt, author, categories, tags, post_format)
 
 
 def feed2fields(file):
@@ -163,7 +166,7 @@ def build_markdown_header(title, date, author, categories, tags):
 
 def fields2pelican(fields, output_path, dircat=False):
     for title, content, filename, date, author, categories, tags, markup in fields:
-        if (markup == "markdown") or (markup == "wiki"):
+        if (markup == "markdown"):
             ext = '.md'
             header = build_markdown_header(title, date, author, categories, tags)
         else:
@@ -175,7 +178,7 @@ def fields2pelican(fields, output_path, dircat=False):
         # option to put files in directories with categories names
         if dircat and (len(categories) == 1):
             catname = slugify(categories[0])
-            out_filename = os.path.join(output_path, catname, filename+'.rst')
+            out_filename = os.path.join(output_path, catname, filename+ext)
             if not os.path.isdir(os.path.join(output_path, catname)):
                 os.mkdir(os.path.join(output_path, catname))
         else:
@@ -189,8 +192,7 @@ def fields2pelican(fields, output_path, dircat=False):
             with open(html_filename, 'w', encoding='utf-8') as fp:
                 fp.write(content)
 
-            os.system('pandoc --normalize --reference-links --from=html --to=rst -o "%s" "%s"' % (out_filename,
-                html_filename))
+            os.system('pandoc --normalize --reference-links --from=html --to=rst -o "%s" "%s"' % (out_filename, html_filename))
 
             os.remove(html_filename)
 

From a50e986f80e7e28e19c4d80a8721b3506064324c Mon Sep 17 00:00:00 2001
From: Nicolas Steinmetz <nsteinmetz@gmail.com>
Date: Tue, 30 Aug 2011 23:49:45 +0200
Subject: [PATCH 4/5] add tag import feature for dotclear import

---
 tools/pelican-import | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/pelican-import b/tools/pelican-import
index b8c9197c..a57ce25d 100755
--- a/tools/pelican-import
+++ b/tools/pelican-import
@@ -37,6 +37,8 @@ def wp2fields(xml):
 
 def dc2fields(file):
     """Opens a Dotclear export file, and yield pelican fields"""
+    from BeautifulSoup import BeautifulStoneSoup, BeautifulSoup
+
     in_cat = False
     in_post = False
     category_list = {}
@@ -100,7 +102,7 @@ def dc2fields(file):
         # post_open_tb = fields[24]
         # nb_comment = fields[25]
         # nb_trackback = fields[26]
-        # post_meta = fields[27]
+        post_meta = fields[27]
         # redirect_url = fields[28][:-1]
 
         # remove seconds
@@ -113,6 +115,22 @@ def dc2fields(file):
         if cat_id:
             categories = [category_list[id].strip() for id in cat_id.split(',')]
 
+        tag = post_meta.replace('{', '').replace('}', '').replace('a:1:s:3:\\"tag\\";a:', '').replace('a:0:', '')
+        # We handle only line that have tags syntax
+        if len(tag) > 1:
+            if int(tag[:1]) == 1:
+                newtag = tag.split('"')[1]
+                tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )))
+            else:
+                i=1
+                j=1
+                while(i <= int(tag[:1])):
+                    newtag = tag.split('"')[j].replace('\\','')
+                    tags.append(unicode(BeautifulStoneSoup(newtag,convertEntities=BeautifulStoneSoup.HTML_ENTITIES )))
+                    i=i+1
+                    if j < int(tag[:1])*2:
+                        j=j+2
+
         """
         Weird - dotclear2 at least does not use markdown ; so wonder about the use case on "markdown"
         """

From 358e92cf0444e842cd7f5048b83b4835949cef6e Mon Sep 17 00:00:00 2001
From: Nicolas Steinmetz <nsteinmetz@gmail.com>
Date: Fri, 30 Sep 2011 22:48:16 +0200
Subject: [PATCH 5/5] Clean dot clear import

---
 tools/pelican-import | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/pelican-import b/tools/pelican-import
index a57ce25d..4392d9ff 100755
--- a/tools/pelican-import
+++ b/tools/pelican-import
@@ -115,8 +115,8 @@ def dc2fields(file):
         if cat_id:
             categories = [category_list[id].strip() for id in cat_id.split(',')]
 
+        # Get tags related to a post
         tag = post_meta.replace('{', '').replace('}', '').replace('a:1:s:3:\\"tag\\";a:', '').replace('a:0:', '')
-        # We handle only line that have tags syntax
         if len(tag) > 1:
             if int(tag[:1]) == 1:
                 newtag = tag.split('"')[1]
@@ -132,7 +132,8 @@ def dc2fields(file):
                         j=j+2
 
         """
-        Weird - dotclear2 at least does not use markdown ; so wonder about the use case on "markdown"
+        dotclear2 does not use markdown by default unless you use the markdown plugin 
+        Ref: http://plugins.dotaddict.org/dc2/details/formatting-markdown
         """
         if post_format == "markdown":
             content = post_excerpt + post_content