Initial commit

author: Guido Günther <agx@sigxcpu.org> 2008-10-11 20:11:00 +0200
committer: Guido Günther <agx@sigxcpu.org> 2008-10-14 22:45:00 +0200
commit: f07e9117d0f018b6ec48d5f5dca599dcff280684 (patch)
tree: 45e2dc81fc178079a610bb4d26a56c7e4dc79b23 /odfrecode
8 files changed, 745 insertions, 0 deletions
diff --git a/odfrecode/__init__.py b/odfrecode/__init__.py
new file mode 100644
index 0000000..328100e
--- /dev/null
+++ b/odfrecode/__init__.py
@@ -0,0 +1,105 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+import os
+import zipfile
+import tempfile
+import shutil
+import xml.dom.minidom
+
+import recoders
+
+__xmls = [ 'content.xml', 'styles.xml' ]
+
+def __convert(textnode, weird_enc):
+    """remap source encoding to the corresponding unicode codepoints"""
+    newdata=u''
+    for key in textnode.data:
+        newdata += weird_enc.recode(key)
+    textnode.data = newdata
+
+
+def __recode_xml_tree(node, weird_enc):
+   if node.hasChildNodes():
+      for kid in node.childNodes:
+         __recode_xml_tree(kid, weird_enc)
+   elif node.nodeType == node.TEXT_NODE:
+         __convert(node, weird_enc)
+
+
+def __subst_fonts(contents, fontmap):
+    """substitute fonts according to fontmap"""
+    textprops = contents.getElementsByTagName('style:text-properties')
+    for prop in textprops:
+        oldfont = prop.getAttribute('style:font-name')
+        if not oldfont:
+            continue
+        try:
+            newfont = fontmap[oldfont]
+            prop.setAttribute('style:font-name', newfont)
+        except KeyError:
+            continue
+
+
+def to_utf8(srcname, encoding, fontmap=None):
+    """
+    convert a odf document from encoding to unicode
+    @param srcname: file to convert
+    @type srcname: string
+    @param encoding: destination encoding
+    @type encoding: Recoder subclass
+    @param fontmap: font substitution map { oldfont1: newfont1, oldfont2: newfont2 }
+    @type fontmap: dict
+    """
+
+    tempdir = tempfile.mkdtemp()
+    dstname = os.path.join(tempdir, os.path.basename(srcname))
+    shutil.copyfile(srcname, dstname)
+
+    # TODO: should handle the exception (no zipfile)
+    src_odf = zipfile.ZipFile(srcname, 'r')
+    dst_odf = zipfile.ZipFile(dstname, 'w')
+
+    for fname in src_odf.namelist():
+        # TODO: should handle the exception (fname not found)
+        data = src_odf.read(fname)
+        if fname in __xmls:
+            contents = xml.dom.minidom.parseString(data)
+            __recode_xml_tree(contents, encoding)
+            __subst_fonts(contents, fontmap)
+            data = contents.toxml('utf-8')
+
+        dst_odf.writestr(fname, data)
+
+    src_odf.close()
+    dst_odf.close()
+    return dstname
+
+
+def get_recoder(encoding):
+    """get the recoder for a specific encoding"""
+    return recoders.recoders[encoding]()
+
+def get_recoders():
+    """get a dict of all recoders"""
+    return recoders.recoders
+
+# vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·:
diff --git a/odfrecode/recoders/__init__.py b/odfrecode/recoders/__init__.py
new file mode 100644
index 0000000..5ec8e84
--- /dev/null
+++ b/odfrecode/recoders/__init__.py
@@ -0,0 +1,38 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008,2009 Guido Günther <agx@sigxcpu.org>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+import recoder
+import armscii
+import georgian
+import romanian
+import cyrillic
+import greek
+
+# List of available recoders
+recoders = {
+             "armscii8": armscii.Armscii8,
+             "georgian": georgian.Georgian,
+             "romanian": romanian.Romanian,
+             "cyrillic": cyrillic.Cyrillic,
+             "greek": greek.Greek,
+           }
+
+# vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·:
diff --git a/odfrecode/recoders/armscii.py b/odfrecode/recoders/armscii.py
new file mode 100644
index 0000000..9098ba6
--- /dev/null
+++ b/odfrecode/recoders/armscii.py
@@ -0,0 +1,125 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+# the mapping table is based on a utf16 encoded table of a vbscript that is:
+# (c) 2003 VAHE GEVORGYAN, UNDER GPL LICENSE
+
+import recoder
+
+class Armscii8(recoder.Recoder):
+    """this class maps armscii8 to unicode"""
+
+    encoding = 'armscii8'
+    dst_encoding = 'utf-16-be'
+
+    character_table = {
+        u'\xd8': '\x05\x44',
+        u'\xd9': '\x05\x74',
+        u'\xd6': '\x05\x43',
+        u'\xd7': '\x05\x73',
+        u'\xd4': '\x05\x42',
+        u'\xd5': '\x05\x72',
+        u'\xd2': '\x05\x41',
+        u'\xd3': '\x05\x71',
+        u'\xda': '\x05\x45',
+        u'\xdb': '\x05\x75',
+        u'\xa4': '\x00\x29',
+        u'\xa5': '\x00\x28',
+        u'\xa6': '\x00\xbb',
+        u'\xa7': '\x00\xab',
+        u'\xdc': '\x05\x46',
+        u'\xc7': '\x05\x6b',
+        u'\xc6': '\x05\x3b',
+        u'\xc3': '\x05\x69',
+        u'\xc2': '\x05\x39',
+        u'\xc5': '\x05\x6a',
+        u'\xc4': '\x05\x3a',
+        u'\xbf': '\x05\x67',
+        u'\xbe': '\x05\x37',
+        u'\xc1': '\x05\x68',
+        u'\xc0': '\x05\x38',
+        u'\xfc': '\x05\x56',
+        u'\xfd': '\x05\x86',
+        u'\xfa': '\x05\x55',
+        u'\xfb': '\x05\x85',
+        u'\xfe': '\x05\x5a',
+        u'\xf5': '\x05\x82',
+        u'\xf4': '\x05\x52',
+        u'\xf7': '\x05\x83',
+        u'\xf6': '\x05\x53',
+        u'\xf1': '\x05\x80',
+        u'\xf0': '\x05\x50',
+        u'\xf3': '\x05\x81',
+        u'\xf2': '\x05\x51',
+        u'\xf9': '\x05\x84',
+        u'\xf8': '\x05\x54',
+        u'\xb3': '\x05\x61',
+        u'\xb2': '\x05\x31',
+        u'\xb1': '\x05\x5e',
+        u'\xb0': '\x05\x5b',
+        u'\xaf': '\x05\x5c',
+        u'\xae': '\x20\x26',
+        u'\xad': '\x05\x8a',
+        u'\xac': '\x00\x2d',
+        u'\xab': '\x00\x2c',
+        u'\xaa': '\x05\x5d',
+        u'\xb6': '\x05\x33',
+        u'\xb7': '\x05\x63',
+        u'\xb4': '\x05\x32',
+        u'\xb5': '\x05\x62',
+        u'\xba': '\x05\x35',
+        u'\xbb': '\x05\x65',
+        u'\xb8': '\x05\x34',
+        u'\xb9': '\x05\x64',
+        u'\xbc': '\x05\x36',
+        u'\xbd': '\x05\x66',
+        u'\xa8': '\x05\x87',
+        u'\xa9': '\x00\x2e',
+        u'\xe5': '\x05\x7a',
+        u'\xe4': '\x05\x4a',
+        u'\xe3': '\x05\x79',
+        u'\xe2': '\x05\x49',
+        u'\xe1': '\x05\x78',
+        u'\xe0': '\x05\x48',
+        u'\xdf': '\x05\x77',
+        u'\xde': '\x05\x47',
+        u'\xdd': '\x05\x76',
+        u'\xa3': '\x05\x89',
+        u'\xee': '\x05\x4f',
+        u'\xef': '\x05\x7f',
+        u'\xea': '\x05\x4d',
+        u'\xeb': '\x05\x7d',
+        u'\xec': '\x05\x4e',
+        u'\xed': '\x05\x7e',
+        u'\xe6': '\x05\x4b',
+        u'\xe7': '\x05\x7b',
+        u'\xe8': '\x05\x4c',
+        u'\xe9': '\x05\x7c',
+        u'\xc9': '\x05\x6c',
+        u'\xc8': '\x05\x3c',
+        u'\xcb': '\x05\x6d',
+        u'\xca': '\x05\x3d',
+        u'\xcd': '\x05\x6e',
+        u'\xcc': '\x05\x3e',
+        u'\xcf': '\x05\x6f',
+        u'\xce': '\x05\x3f',
+        u'\xd1': '\x05\x70',
+        u'\xd0': '\x05\x40',
+       }
diff --git a/odfrecode/recoders/cyrillic.py b/odfrecode/recoders/cyrillic.py
new file mode 100644
index 0000000..7a3cdda
--- /dev/null
+++ b/odfrecode/recoders/cyrillic.py
@@ -0,0 +1,151 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2009 Guido Günther <agx@sigxcpu.org>
+# (c) 2007 Torsten Werner <twerner@debian.org>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+import recoder
+
+class Cyrillic(recoder.Recoder):
+    """
+    this class maps cyrillic characters from 0x80-0xff to unicode
+    the mapping table is based on code from VeraWeb's madlan converter
+    """
+
+    encoding = 'cyrillic'
+    dst_encoding = 'utf-16-be'
+
+    character_table = {
+        u'\x7f': '\x04\x0c',
+        u'\x80': '\x04\x02',
+        u'\x81': '\x04\x03',
+        u'\x82': '\x20\x1a',
+        u'\x83': '\x04\x53',
+        u'\x84': '\x20\x1e',
+        u'\x85': '\x20\x26',
+        u'\x86': '\x20\x20',
+        u'\x87': '\x20\x21',
+        u'\x88': '\x00\x20',
+        u'\x89': '\x20\x30',
+        u'\x8a': '\x04\x09',
+        u'\x8b': '\x20\x39',
+        u'\x8c': '\x04\x0a',
+        u'\x8d': '\x04\x0c',
+        u'\x8e': '\x04\x0b',
+        u'\x8f': '\x04\x0f',
+        u'\x90': '\x04\x52',
+        u'\x91': '\x20\x18',
+        u'\x92': '\x20\x19',
+        u'\x93': '\x20\x1c',
+        u'\x94': '\x20\x1d',
+        u'\x95': '\x20\x22',
+        u'\x96': '\x20\x13',
+        u'\x97': '\x20\x14',
+        u'\x98': '\x00\x20',
+        u'\x99': '\x21\x22',
+        u'\x9a': '\x04\x59',
+        u'\x9b': '\x20\x3a',
+        u'\x9c': '\x04\x5a',
+        u'\x9d': '\x04\x5c',
+        u'\x9e': '\x04\x5b',
+        u'\x9f': '\x04\x5f',
+        u'\xa0': '\x00\x20',
+        u'\xa1': '\x04\x0e',
+        u'\xa2': '\x04\x5e',
+        u'\xa3': '\x04\x08',
+        u'\xa5': '\x04\x90',
+        u'\xa8': '\x04\x01',
+        u'\xaa': '\x04\x04',
+        u'\xaf': '\x04\x07',
+        u'\xb2': '\x04\x06',
+        u'\xb3': '\x04\x57',
+        u'\xb4': '\x04\x91',
+        u'\xb8': '\x04\x51',
+        u'\xb9': '\x21\x16',
+        u'\xba': '\x04\x54',
+        u'\xbc': '\x04\x58',
+        u'\xbd': '\x04\x05',
+        u'\xbe': '\x04\x55',
+        u'\xbf': '\x04\x57',
+        u'\xc0': '\x04\x10',
+        u'\xc1': '\x04\x11',
+        u'\xc2': '\x04\x12',
+        u'\xc3': '\x04\x13',
+        u'\xc4': '\x04\x14',
+        u'\xc5': '\x04\x15',
+        u'\xc6': '\x04\x16',
+        u'\xc7': '\x04\x17',
+        u'\xc8': '\x04\x18',
+        u'\xc9': '\x04\x19',
+        u'\xca': '\x04\x1a',
+        u'\xcb': '\x04\x1b',
+        u'\xcc': '\x04\x1c',
+        u'\xcd': '\x04\x1d',
+        u'\xce': '\x04\x1e',
+        u'\xcf': '\x04\x1f',
+        u'\xd0': '\x04\x20',
+        u'\xd1': '\x04\x21',
+        u'\xd2': '\x04\x22',
+        u'\xd3': '\x04\x23',
+        u'\xd4': '\x04\x24',
+        u'\xd5': '\x04\x25',
+        u'\xd6': '\x04\x26',
+        u'\xd7': '\x04\x27',
+        u'\xd8': '\x04\x28',
+        u'\xd9': '\x04\x29',
+        u'\xda': '\x04\x2a',
+        u'\xdb': '\x04\x2b',
+        u'\xdc': '\x04\x2c',
+        u'\xdd': '\x04\x2d',
+        u'\xde': '\x04\x2e',
+        u'\xdf': '\x04\x2f',
+        u'\xe0': '\x04\x30',
+        u'\xe1': '\x04\x31',
+        u'\xe2': '\x04\x32',
+        u'\xe3': '\x04\x33',
+        u'\xe4': '\x04\x34',
+        u'\xe5': '\x04\x35',
+        u'\xe6': '\x04\x36',
+        u'\xe7': '\x04\x37',
+        u'\xe8': '\x04\x38',
+        u'\xe9': '\x04\x39',
+        u'\xea': '\x04\x3a',
+        u'\xeb': '\x04\x3b',
+        u'\xec': '\x04\x3c',
+        u'\xed': '\x04\x3d',
+        u'\xee': '\x04\x3e',
+        u'\xef': '\x04\x3f',
+        u'\xf0': '\x04\x40',
+        u'\xf1': '\x04\x41',
+        u'\xf2': '\x04\x42',
+        u'\xf3': '\x04\x43',
+        u'\xf4': '\x04\x44',
+        u'\xf5': '\x04\x45',
+        u'\xf6': '\x04\x46',
+        u'\xf7': '\x04\x47',
+        u'\xf8': '\x04\x48',
+        u'\xf9': '\x04\x49',
+        u'\xfa': '\x04\x4a',
+        u'\xfb': '\x04\x4b',
+        u'\xfc': '\x04\x4c',
+        u'\xfd': '\x04\x4d',
+        u'\xfe': '\x04\x4e',
+        u'\xff': '\x04\x4f',
+    }
+
diff --git a/odfrecode/recoders/georgian.py b/odfrecode/recoders/georgian.py
new file mode 100644
index 0000000..247cc5a
--- /dev/null
+++ b/odfrecode/recoders/georgian.py
@@ -0,0 +1,65 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+# (c) 2008 Torsten Werner <twerner@debian.org>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+import recoder
+
+class Georgian(recoder.Recoder):
+    """this class maps georgian to unicode"""
+
+    encoding = 'georgian'
+    dst_encoding = 'utf-8'
+
+    character_table = {
+        u'\x43': 'ჩ',
+        u'\x4a': 'ჟ',
+        u'\x52': 'ღ',
+        u'\x53': 'შ',
+        u'\x54': 'თ',
+        u'\x57': 'ჭ',
+        u'\x5a': 'ძ',
+        u'\x61': 'ა',
+        u'\x62': 'ბ',
+        u'\x63': 'ც',
+        u'\x64': 'დ',
+        u'\x65': 'ე',
+        u'\x66': 'ფ',
+        u'\x67': 'გ',
+        u'\x68': 'ჰ',
+        u'\x69': 'ი',
+        u'\x6a': 'ჯ',
+        u'\x6b': 'კ',
+        u'\x6c': 'ლ',
+        u'\x6d': 'მ',
+        u'\x6e': 'ნ',
+        u'\x6f': 'ო',
+        u'\x70': 'პ',
+        u'\x71': 'ქ',
+        u'\x72': 'რ',
+        u'\x73': 'ს',
+        u'\x74': 'ტ',
+        u'\x75': 'უ',
+        u'\x76': 'ვ',
+        u'\x77': 'წ',
+        u'\x78': 'ხ',
+        u'\x79': 'ყ',
+	u'\x7a': 'ზ',
+       }
diff --git a/odfrecode/recoders/greek.py b/odfrecode/recoders/greek.py
new file mode 100644
index 0000000..7610338
--- /dev/null
+++ b/odfrecode/recoders/greek.py
@@ -0,0 +1,154 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2009 Guido Günther <agx@sigxcpu.org>
+# (c) 2007 Torsten Werner <twerner@debian.org>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+import recoder
+
+class Greek(recoder.Recoder):
+    """
+    this class maps greek characters from 0x80-0xff to unicode
+    the mapping table is based on code from VeraWeb's madlan converter
+    """
+
+    encoding = 'cyrillic'
+    dst_encoding = 'utf-16-be'
+
+    character_table = {
+        u'\x82': '\x20\x1a',
+        u'\x83': '\x01\x92',
+        u'\x84': '\x20\x1e',
+        u'\x85': '\x20\x26',
+        u'\x86': '\x20\x20',
+        u'\x87': '\x20\x21',
+        u'\x88': '\x02\xc6',
+        u'\x89': '\x20\x30',
+        u'\x8a': '\x01\x60',
+        u'\x8b': '\x20\x39',
+        u'\x8c': '\x01\x52',
+        u'\x91': '\x20\x18',
+        u'\x92': '\x20\x19',
+        u'\x93': '\x20\x1c',
+        u'\x94': '\x20\x10',
+        u'\x95': '\x20\x22',
+        u'\x96': '\x20\x13',
+        u'\x97': '\x20\x14',
+        u'\x98': '\x02\xdc',
+        u'\x99': '\x21\x22',
+        u'\x9a': '\x01\x61',
+        u'\x9b': '\x20\x3a',
+        u'\x9c': '\x01\x53',
+        u'\x9f': '\x01\x78',
+        u'\xa1': '\x03\x85',
+        u'\xa2': '\x03\x86',
+        u'\xa3': '\x00\xa3',
+        u'\xa4': '\x00\xa4',
+        u'\xa5': '\x00\xa5',
+        u'\xa6': '\x00\xa6',
+        u'\xa7': '\x00\xa7',
+        u'\xa8': '\x00\xa8',
+        u'\xa9': '\x00\xa9',
+        u'\xaa': '\x00\xaa',
+        u'\xab': '\x00\xab',
+        u'\xac': '\x00\xac',
+        u'\xad': '\x00\xad',
+        u'\xae': '\x00\xae',
+        u'\xaf': '\x00\xaf',
+        u'\xb0': '\x00\xb0',
+        u'\xb1': '\x00\xb1',
+        u'\xb2': '\x00\xb2',
+        u'\xb3': '\x00\xb3',
+        u'\xb4': '\x00\xb4',
+        u'\xb5': '\x00\xb5',
+        u'\xb6': '\x00\xb6',
+        u'\xb7': '\x00\xb7',
+        u'\xb8': '\x03\x88',
+        u'\xb9': '\x03\x89',
+        u'\xba': '\x03\x8a',
+        u'\xbb': '\x00\xbb',
+        u'\xbc': '\x03\x8c',
+        u'\xbd': '\x00\xbd',
+        u'\xbe': '\x03\x8e',
+        u'\xbf': '\x03\x8f',
+        u'\xc0': '\x03\x90',
+        u'\xc1': '\x03\x91',
+        u'\xc2': '\x03\x92',
+        u'\xc3': '\x03\x93',
+        u'\xc4': '\x03\x94',
+        u'\xc5': '\x03\x95',
+        u'\xc6': '\x03\x96',
+        u'\xc7': '\x03\x97',
+        u'\xc8': '\x03\x98',
+        u'\xc9': '\x03\x99',
+        u'\xca': '\x03\x9a',
+        u'\xcb': '\x03\x9b',
+        u'\xcc': '\x03\x9c',
+        u'\xcd': '\x03\x9d',
+        u'\xce': '\x03\x9e',
+        u'\xcf': '\x03\x9f',
+        u'\xd0': '\x03\xa0',
+        u'\xd1': '\x03\xa1',
+        u'\xd2': '\x03\xda',
+        u'\xd3': '\x03\xa3',
+        u'\xd4': '\x03\xa4',
+        u'\xd5': '\x03\xa5',
+        u'\xd6': '\x03\xa6',
+        u'\xd7': '\x03\xa7',
+        u'\xd8': '\x03\xa8',
+        u'\xd9': '\x03\xa9',
+        u'\xda': '\x03\xaa',
+        u'\xdb': '\x03\xab',
+        u'\xdc': '\x03\xac',
+        u'\xdd': '\x03\xad',
+        u'\xde': '\x03\xae',
+        u'\xdf': '\x03\xaf',
+        u'\xe0': '\x03\xb0',
+        u'\xe1': '\x03\xb1',
+        u'\xe2': '\x03\xb2',
+        u'\xe3': '\x03\xb3',
+        u'\xe4': '\x03\xb4',
+        u'\xe5': '\x03\xb5',
+        u'\xe6': '\x03\xb6',
+        u'\xe7': '\x03\xb7',
+        u'\xe8': '\x03\xb8',
+        u'\xe9': '\x03\xb9',
+        u'\xea': '\x03\xba',
+        u'\xeb': '\x03\xbb',
+        u'\xec': '\x03\xbc',
+        u'\xed': '\x03\xbd',
+        u'\xee': '\x03\xbe',
+        u'\xef': '\x03\xbf',
+        u'\xf0': '\x03\xc0',
+        u'\xf1': '\x03\xc1',
+        u'\xf2': '\x03\xc2',
+        u'\xf3': '\x03\xc3',
+        u'\xf4': '\x03\xc4',
+        u'\xf5': '\x03\xc5',
+        u'\xf6': '\x03\xc6',
+        u'\xf7': '\x03\xc7',
+        u'\xf8': '\x03\xc8',
+        u'\xf9': '\x03\xc9',
+        u'\xfa': '\x03\xca',
+        u'\xfb': '\x03\xcb',
+        u'\xfc': '\x03\xcc',
+        u'\xfd': '\x03\xce',
+        u'\xfe': '\x03\xce',
+	}
+
diff --git a/odfrecode/recoders/recoder.py b/odfrecode/recoders/recoder.py
new file mode 100644
index 0000000..01a3c1a
--- /dev/null
+++ b/odfrecode/recoders/recoder.py
@@ -0,0 +1,45 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+class Recoder(object):
+    """
+    base class for all encoders
+    @classvar dst_encoding: encoding of the destination "column"
+    @classvar encoding: name of the encoding
+
+    To write a new encoder simply add a dictionary 
+
+    >>>	character_table = { "national_encoding_1": "utf8_1", 
+	                     national_encoding_2": "utf8_2",
+			  }
+    	 
+    If you don't want to use utf8 as dictionary values specify dst_encoding
+    """
+    dst_encoding = 'utf-8'
+    encoding = None
+
+    def recode(self, character):
+        try:
+            char = unicode(self.character_table[character], self.dst_encoding)
+        except KeyError: # needs no remapping
+            char = character
+        return char
+
diff --git a/odfrecode/recoders/romanian.py b/odfrecode/recoders/romanian.py
new file mode 100644
index 0000000..4f07e9d
--- /dev/null
+++ b/odfrecode/recoders/romanian.py
@@ -0,0 +1,62 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2010 Guido Günther <agx@sigxcpu.org>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program; if not, write to the Free Software
+#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+
+import recoder
+
+class Romanian(recoder.Recoder):
+    """
+    this class maps Romanian to unicode
+    See https://bugzilla.redhat.com/show_bug.cgi?id=327501 
+    and http://en.wikipedia.org/wiki/Romanian_alphabet for details.
+    """
+
+    encoding = 'romanian'
+    dst_encoding = 'utf-8'
+
+    character_table = {
+        # "S with comma below" (Unicode 0218) - incorrectly implemented as 
+        # "S with cedilla below" (Unicode 015E)
+        u'Ş': 'Ș',
+        # "s with comma below" (Unicode 0219) - incorrectly implemented as 
+        # "s with cedilla below" (Unicode 015F)
+        u'ş': 'ș',
+        # "T with comma below" (Unicode 021A) - incorrectly implemented as 
+        # "T with cedilla below" (Unicode 0162)
+        u'Ţ': 'Ț',
+        # "t with comma below" (Unicode 021B) - incorrectly implemented as 
+        # "t with cedilla below" (Unicode 0163)
+        u'ţ': 'ț',
+        # Furthermore the Microsoft's EasternRoman Font has mappings from
+        # "Latin-1 supplement" of these characters:
+        # S with comma below at 0xaa
+        u'ª': 'Ș',
+        # s with comma below at 0xba
+        u'º': 'ș',
+        # T with comma below at 0xde:
+        u'Þ': 'Ț',
+        # t with comma below at 0xfe:
+        u'þ': 'ț',
+        # A with breve at 0xc3:
+        u'Ã': 'Ă',
+        # a with breve at 0xe3:
+        u'ã': 'ă',
+    }
+
author	Guido Günther <agx@sigxcpu.org>	2008-10-11 20:11:00 +0200
committer	Guido Günther <agx@sigxcpu.org>	2008-10-14 22:45:00 +0200
commit	f07e9117d0f018b6ec48d5f5dca599dcff280684 (patch)
tree	45e2dc81fc178079a610bb4d26a56c7e4dc79b23 /odfrecode