aboutsummaryrefslogtreecommitdiff
path: root/odfrecode
diff options
context:
space:
mode:
authorGuido Günther <agx@sigxcpu.org>2008-10-11 20:11:00 +0200
committerGuido Günther <agx@sigxcpu.org>2008-10-14 22:45:00 +0200
commitf07e9117d0f018b6ec48d5f5dca599dcff280684 (patch)
tree45e2dc81fc178079a610bb4d26a56c7e4dc79b23 /odfrecode
Initial commit
Diffstat (limited to 'odfrecode')
-rw-r--r--odfrecode/__init__.py105
-rw-r--r--odfrecode/recoders/__init__.py38
-rw-r--r--odfrecode/recoders/armscii.py125
-rw-r--r--odfrecode/recoders/cyrillic.py151
-rw-r--r--odfrecode/recoders/georgian.py65
-rw-r--r--odfrecode/recoders/greek.py154
-rw-r--r--odfrecode/recoders/recoder.py45
-rw-r--r--odfrecode/recoders/romanian.py62
8 files changed, 745 insertions, 0 deletions
diff --git a/odfrecode/__init__.py b/odfrecode/__init__.py
new file mode 100644
index 0000000..328100e
--- /dev/null
+++ b/odfrecode/__init__.py
@@ -0,0 +1,105 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import os
+import zipfile
+import tempfile
+import shutil
+import xml.dom.minidom
+
+import recoders
+
+__xmls = [ 'content.xml', 'styles.xml' ]
+
+def __convert(textnode, weird_enc):
+ """remap source encoding to the corresponding unicode codepoints"""
+ newdata=u''
+ for key in textnode.data:
+ newdata += weird_enc.recode(key)
+ textnode.data = newdata
+
+
+def __recode_xml_tree(node, weird_enc):
+ if node.hasChildNodes():
+ for kid in node.childNodes:
+ __recode_xml_tree(kid, weird_enc)
+ elif node.nodeType == node.TEXT_NODE:
+ __convert(node, weird_enc)
+
+
+def __subst_fonts(contents, fontmap):
+ """substitute fonts according to fontmap"""
+ textprops = contents.getElementsByTagName('style:text-properties')
+ for prop in textprops:
+ oldfont = prop.getAttribute('style:font-name')
+ if not oldfont:
+ continue
+ try:
+ newfont = fontmap[oldfont]
+ prop.setAttribute('style:font-name', newfont)
+ except KeyError:
+ continue
+
+
+def to_utf8(srcname, encoding, fontmap=None):
+ """
+ convert a odf document from encoding to unicode
+ @param srcname: file to convert
+ @type srcname: string
+ @param encoding: destination encoding
+ @type encoding: Recoder subclass
+ @param fontmap: font substitution map { oldfont1: newfont1, oldfont2: newfont2 }
+ @type fontmap: dict
+ """
+
+ tempdir = tempfile.mkdtemp()
+ dstname = os.path.join(tempdir, os.path.basename(srcname))
+ shutil.copyfile(srcname, dstname)
+
+ # TODO: should handle the exception (no zipfile)
+ src_odf = zipfile.ZipFile(srcname, 'r')
+ dst_odf = zipfile.ZipFile(dstname, 'w')
+
+ for fname in src_odf.namelist():
+ # TODO: should handle the exception (fname not found)
+ data = src_odf.read(fname)
+ if fname in __xmls:
+ contents = xml.dom.minidom.parseString(data)
+ __recode_xml_tree(contents, encoding)
+ __subst_fonts(contents, fontmap)
+ data = contents.toxml('utf-8')
+
+ dst_odf.writestr(fname, data)
+
+ src_odf.close()
+ dst_odf.close()
+ return dstname
+
+
+def get_recoder(encoding):
+ """get the recoder for a specific encoding"""
+ return recoders.recoders[encoding]()
+
+def get_recoders():
+ """get a dict of all recoders"""
+ return recoders.recoders
+
+# vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·:
diff --git a/odfrecode/recoders/__init__.py b/odfrecode/recoders/__init__.py
new file mode 100644
index 0000000..5ec8e84
--- /dev/null
+++ b/odfrecode/recoders/__init__.py
@@ -0,0 +1,38 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008,2009 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+import armscii
+import georgian
+import romanian
+import cyrillic
+import greek
+
+# List of available recoders
+recoders = {
+ "armscii8": armscii.Armscii8,
+ "georgian": georgian.Georgian,
+ "romanian": romanian.Romanian,
+ "cyrillic": cyrillic.Cyrillic,
+ "greek": greek.Greek,
+ }
+
+# vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·:
diff --git a/odfrecode/recoders/armscii.py b/odfrecode/recoders/armscii.py
new file mode 100644
index 0000000..9098ba6
--- /dev/null
+++ b/odfrecode/recoders/armscii.py
@@ -0,0 +1,125 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# the mapping table is based on a utf16 encoded table of a vbscript that is:
+# (c) 2003 VAHE GEVORGYAN, UNDER GPL LICENSE
+
+import recoder
+
+class Armscii8(recoder.Recoder):
+ """this class maps armscii8 to unicode"""
+
+ encoding = 'armscii8'
+ dst_encoding = 'utf-16-be'
+
+ character_table = {
+ u'\xd8': '\x05\x44',
+ u'\xd9': '\x05\x74',
+ u'\xd6': '\x05\x43',
+ u'\xd7': '\x05\x73',
+ u'\xd4': '\x05\x42',
+ u'\xd5': '\x05\x72',
+ u'\xd2': '\x05\x41',
+ u'\xd3': '\x05\x71',
+ u'\xda': '\x05\x45',
+ u'\xdb': '\x05\x75',
+ u'\xa4': '\x00\x29',
+ u'\xa5': '\x00\x28',
+ u'\xa6': '\x00\xbb',
+ u'\xa7': '\x00\xab',
+ u'\xdc': '\x05\x46',
+ u'\xc7': '\x05\x6b',
+ u'\xc6': '\x05\x3b',
+ u'\xc3': '\x05\x69',
+ u'\xc2': '\x05\x39',
+ u'\xc5': '\x05\x6a',
+ u'\xc4': '\x05\x3a',
+ u'\xbf': '\x05\x67',
+ u'\xbe': '\x05\x37',
+ u'\xc1': '\x05\x68',
+ u'\xc0': '\x05\x38',
+ u'\xfc': '\x05\x56',
+ u'\xfd': '\x05\x86',
+ u'\xfa': '\x05\x55',
+ u'\xfb': '\x05\x85',
+ u'\xfe': '\x05\x5a',
+ u'\xf5': '\x05\x82',
+ u'\xf4': '\x05\x52',
+ u'\xf7': '\x05\x83',
+ u'\xf6': '\x05\x53',
+ u'\xf1': '\x05\x80',
+ u'\xf0': '\x05\x50',
+ u'\xf3': '\x05\x81',
+ u'\xf2': '\x05\x51',
+ u'\xf9': '\x05\x84',
+ u'\xf8': '\x05\x54',
+ u'\xb3': '\x05\x61',
+ u'\xb2': '\x05\x31',
+ u'\xb1': '\x05\x5e',
+ u'\xb0': '\x05\x5b',
+ u'\xaf': '\x05\x5c',
+ u'\xae': '\x20\x26',
+ u'\xad': '\x05\x8a',
+ u'\xac': '\x00\x2d',
+ u'\xab': '\x00\x2c',
+ u'\xaa': '\x05\x5d',
+ u'\xb6': '\x05\x33',
+ u'\xb7': '\x05\x63',
+ u'\xb4': '\x05\x32',
+ u'\xb5': '\x05\x62',
+ u'\xba': '\x05\x35',
+ u'\xbb': '\x05\x65',
+ u'\xb8': '\x05\x34',
+ u'\xb9': '\x05\x64',
+ u'\xbc': '\x05\x36',
+ u'\xbd': '\x05\x66',
+ u'\xa8': '\x05\x87',
+ u'\xa9': '\x00\x2e',
+ u'\xe5': '\x05\x7a',
+ u'\xe4': '\x05\x4a',
+ u'\xe3': '\x05\x79',
+ u'\xe2': '\x05\x49',
+ u'\xe1': '\x05\x78',
+ u'\xe0': '\x05\x48',
+ u'\xdf': '\x05\x77',
+ u'\xde': '\x05\x47',
+ u'\xdd': '\x05\x76',
+ u'\xa3': '\x05\x89',
+ u'\xee': '\x05\x4f',
+ u'\xef': '\x05\x7f',
+ u'\xea': '\x05\x4d',
+ u'\xeb': '\x05\x7d',
+ u'\xec': '\x05\x4e',
+ u'\xed': '\x05\x7e',
+ u'\xe6': '\x05\x4b',
+ u'\xe7': '\x05\x7b',
+ u'\xe8': '\x05\x4c',
+ u'\xe9': '\x05\x7c',
+ u'\xc9': '\x05\x6c',
+ u'\xc8': '\x05\x3c',
+ u'\xcb': '\x05\x6d',
+ u'\xca': '\x05\x3d',
+ u'\xcd': '\x05\x6e',
+ u'\xcc': '\x05\x3e',
+ u'\xcf': '\x05\x6f',
+ u'\xce': '\x05\x3f',
+ u'\xd1': '\x05\x70',
+ u'\xd0': '\x05\x40',
+ }
diff --git a/odfrecode/recoders/cyrillic.py b/odfrecode/recoders/cyrillic.py
new file mode 100644
index 0000000..7a3cdda
--- /dev/null
+++ b/odfrecode/recoders/cyrillic.py
@@ -0,0 +1,151 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2009 Guido Günther <agx@sigxcpu.org>
+# (c) 2007 Torsten Werner <twerner@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+
+class Cyrillic(recoder.Recoder):
+ """
+ this class maps cyrillic characters from 0x80-0xff to unicode
+ the mapping table is based on code from VeraWeb's madlan converter
+ """
+
+ encoding = 'cyrillic'
+ dst_encoding = 'utf-16-be'
+
+ character_table = {
+ u'\x7f': '\x04\x0c',
+ u'\x80': '\x04\x02',
+ u'\x81': '\x04\x03',
+ u'\x82': '\x20\x1a',
+ u'\x83': '\x04\x53',
+ u'\x84': '\x20\x1e',
+ u'\x85': '\x20\x26',
+ u'\x86': '\x20\x20',
+ u'\x87': '\x20\x21',
+ u'\x88': '\x00\x20',
+ u'\x89': '\x20\x30',
+ u'\x8a': '\x04\x09',
+ u'\x8b': '\x20\x39',
+ u'\x8c': '\x04\x0a',
+ u'\x8d': '\x04\x0c',
+ u'\x8e': '\x04\x0b',
+ u'\x8f': '\x04\x0f',
+ u'\x90': '\x04\x52',
+ u'\x91': '\x20\x18',
+ u'\x92': '\x20\x19',
+ u'\x93': '\x20\x1c',
+ u'\x94': '\x20\x1d',
+ u'\x95': '\x20\x22',
+ u'\x96': '\x20\x13',
+ u'\x97': '\x20\x14',
+ u'\x98': '\x00\x20',
+ u'\x99': '\x21\x22',
+ u'\x9a': '\x04\x59',
+ u'\x9b': '\x20\x3a',
+ u'\x9c': '\x04\x5a',
+ u'\x9d': '\x04\x5c',
+ u'\x9e': '\x04\x5b',
+ u'\x9f': '\x04\x5f',
+ u'\xa0': '\x00\x20',
+ u'\xa1': '\x04\x0e',
+ u'\xa2': '\x04\x5e',
+ u'\xa3': '\x04\x08',
+ u'\xa5': '\x04\x90',
+ u'\xa8': '\x04\x01',
+ u'\xaa': '\x04\x04',
+ u'\xaf': '\x04\x07',
+ u'\xb2': '\x04\x06',
+ u'\xb3': '\x04\x57',
+ u'\xb4': '\x04\x91',
+ u'\xb8': '\x04\x51',
+ u'\xb9': '\x21\x16',
+ u'\xba': '\x04\x54',
+ u'\xbc': '\x04\x58',
+ u'\xbd': '\x04\x05',
+ u'\xbe': '\x04\x55',
+ u'\xbf': '\x04\x57',
+ u'\xc0': '\x04\x10',
+ u'\xc1': '\x04\x11',
+ u'\xc2': '\x04\x12',
+ u'\xc3': '\x04\x13',
+ u'\xc4': '\x04\x14',
+ u'\xc5': '\x04\x15',
+ u'\xc6': '\x04\x16',
+ u'\xc7': '\x04\x17',
+ u'\xc8': '\x04\x18',
+ u'\xc9': '\x04\x19',
+ u'\xca': '\x04\x1a',
+ u'\xcb': '\x04\x1b',
+ u'\xcc': '\x04\x1c',
+ u'\xcd': '\x04\x1d',
+ u'\xce': '\x04\x1e',
+ u'\xcf': '\x04\x1f',
+ u'\xd0': '\x04\x20',
+ u'\xd1': '\x04\x21',
+ u'\xd2': '\x04\x22',
+ u'\xd3': '\x04\x23',
+ u'\xd4': '\x04\x24',
+ u'\xd5': '\x04\x25',
+ u'\xd6': '\x04\x26',
+ u'\xd7': '\x04\x27',
+ u'\xd8': '\x04\x28',
+ u'\xd9': '\x04\x29',
+ u'\xda': '\x04\x2a',
+ u'\xdb': '\x04\x2b',
+ u'\xdc': '\x04\x2c',
+ u'\xdd': '\x04\x2d',
+ u'\xde': '\x04\x2e',
+ u'\xdf': '\x04\x2f',
+ u'\xe0': '\x04\x30',
+ u'\xe1': '\x04\x31',
+ u'\xe2': '\x04\x32',
+ u'\xe3': '\x04\x33',
+ u'\xe4': '\x04\x34',
+ u'\xe5': '\x04\x35',
+ u'\xe6': '\x04\x36',
+ u'\xe7': '\x04\x37',
+ u'\xe8': '\x04\x38',
+ u'\xe9': '\x04\x39',
+ u'\xea': '\x04\x3a',
+ u'\xeb': '\x04\x3b',
+ u'\xec': '\x04\x3c',
+ u'\xed': '\x04\x3d',
+ u'\xee': '\x04\x3e',
+ u'\xef': '\x04\x3f',
+ u'\xf0': '\x04\x40',
+ u'\xf1': '\x04\x41',
+ u'\xf2': '\x04\x42',
+ u'\xf3': '\x04\x43',
+ u'\xf4': '\x04\x44',
+ u'\xf5': '\x04\x45',
+ u'\xf6': '\x04\x46',
+ u'\xf7': '\x04\x47',
+ u'\xf8': '\x04\x48',
+ u'\xf9': '\x04\x49',
+ u'\xfa': '\x04\x4a',
+ u'\xfb': '\x04\x4b',
+ u'\xfc': '\x04\x4c',
+ u'\xfd': '\x04\x4d',
+ u'\xfe': '\x04\x4e',
+ u'\xff': '\x04\x4f',
+ }
+
diff --git a/odfrecode/recoders/georgian.py b/odfrecode/recoders/georgian.py
new file mode 100644
index 0000000..247cc5a
--- /dev/null
+++ b/odfrecode/recoders/georgian.py
@@ -0,0 +1,65 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+# (c) 2008 Torsten Werner <twerner@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+
+class Georgian(recoder.Recoder):
+ """this class maps georgian to unicode"""
+
+ encoding = 'georgian'
+ dst_encoding = 'utf-8'
+
+ character_table = {
+ u'\x43': 'ჩ',
+ u'\x4a': 'ჟ',
+ u'\x52': 'ღ',
+ u'\x53': 'შ',
+ u'\x54': 'თ',
+ u'\x57': 'ჭ',
+ u'\x5a': 'ძ',
+ u'\x61': 'ა',
+ u'\x62': 'ბ',
+ u'\x63': 'ც',
+ u'\x64': 'დ',
+ u'\x65': 'ე',
+ u'\x66': 'ფ',
+ u'\x67': 'გ',
+ u'\x68': 'ჰ',
+ u'\x69': 'ი',
+ u'\x6a': 'ჯ',
+ u'\x6b': 'კ',
+ u'\x6c': 'ლ',
+ u'\x6d': 'მ',
+ u'\x6e': 'ნ',
+ u'\x6f': 'ო',
+ u'\x70': 'პ',
+ u'\x71': 'ქ',
+ u'\x72': 'რ',
+ u'\x73': 'ს',
+ u'\x74': 'ტ',
+ u'\x75': 'უ',
+ u'\x76': 'ვ',
+ u'\x77': 'წ',
+ u'\x78': 'ხ',
+ u'\x79': 'ყ',
+ u'\x7a': 'ზ',
+ }
diff --git a/odfrecode/recoders/greek.py b/odfrecode/recoders/greek.py
new file mode 100644
index 0000000..7610338
--- /dev/null
+++ b/odfrecode/recoders/greek.py
@@ -0,0 +1,154 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2009 Guido Günther <agx@sigxcpu.org>
+# (c) 2007 Torsten Werner <twerner@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+
+class Greek(recoder.Recoder):
+ """
+ this class maps greek characters from 0x80-0xff to unicode
+ the mapping table is based on code from VeraWeb's madlan converter
+ """
+
+ encoding = 'cyrillic'
+ dst_encoding = 'utf-16-be'
+
+ character_table = {
+ u'\x82': '\x20\x1a',
+ u'\x83': '\x01\x92',
+ u'\x84': '\x20\x1e',
+ u'\x85': '\x20\x26',
+ u'\x86': '\x20\x20',
+ u'\x87': '\x20\x21',
+ u'\x88': '\x02\xc6',
+ u'\x89': '\x20\x30',
+ u'\x8a': '\x01\x60',
+ u'\x8b': '\x20\x39',
+ u'\x8c': '\x01\x52',
+ u'\x91': '\x20\x18',
+ u'\x92': '\x20\x19',
+ u'\x93': '\x20\x1c',
+ u'\x94': '\x20\x10',
+ u'\x95': '\x20\x22',
+ u'\x96': '\x20\x13',
+ u'\x97': '\x20\x14',
+ u'\x98': '\x02\xdc',
+ u'\x99': '\x21\x22',
+ u'\x9a': '\x01\x61',
+ u'\x9b': '\x20\x3a',
+ u'\x9c': '\x01\x53',
+ u'\x9f': '\x01\x78',
+ u'\xa1': '\x03\x85',
+ u'\xa2': '\x03\x86',
+ u'\xa3': '\x00\xa3',
+ u'\xa4': '\x00\xa4',
+ u'\xa5': '\x00\xa5',
+ u'\xa6': '\x00\xa6',
+ u'\xa7': '\x00\xa7',
+ u'\xa8': '\x00\xa8',
+ u'\xa9': '\x00\xa9',
+ u'\xaa': '\x00\xaa',
+ u'\xab': '\x00\xab',
+ u'\xac': '\x00\xac',
+ u'\xad': '\x00\xad',
+ u'\xae': '\x00\xae',
+ u'\xaf': '\x00\xaf',
+ u'\xb0': '\x00\xb0',
+ u'\xb1': '\x00\xb1',
+ u'\xb2': '\x00\xb2',
+ u'\xb3': '\x00\xb3',
+ u'\xb4': '\x00\xb4',
+ u'\xb5': '\x00\xb5',
+ u'\xb6': '\x00\xb6',
+ u'\xb7': '\x00\xb7',
+ u'\xb8': '\x03\x88',
+ u'\xb9': '\x03\x89',
+ u'\xba': '\x03\x8a',
+ u'\xbb': '\x00\xbb',
+ u'\xbc': '\x03\x8c',
+ u'\xbd': '\x00\xbd',
+ u'\xbe': '\x03\x8e',
+ u'\xbf': '\x03\x8f',
+ u'\xc0': '\x03\x90',
+ u'\xc1': '\x03\x91',
+ u'\xc2': '\x03\x92',
+ u'\xc3': '\x03\x93',
+ u'\xc4': '\x03\x94',
+ u'\xc5': '\x03\x95',
+ u'\xc6': '\x03\x96',
+ u'\xc7': '\x03\x97',
+ u'\xc8': '\x03\x98',
+ u'\xc9': '\x03\x99',
+ u'\xca': '\x03\x9a',
+ u'\xcb': '\x03\x9b',
+ u'\xcc': '\x03\x9c',
+ u'\xcd': '\x03\x9d',
+ u'\xce': '\x03\x9e',
+ u'\xcf': '\x03\x9f',
+ u'\xd0': '\x03\xa0',
+ u'\xd1': '\x03\xa1',
+ u'\xd2': '\x03\xda',
+ u'\xd3': '\x03\xa3',
+ u'\xd4': '\x03\xa4',
+ u'\xd5': '\x03\xa5',
+ u'\xd6': '\x03\xa6',
+ u'\xd7': '\x03\xa7',
+ u'\xd8': '\x03\xa8',
+ u'\xd9': '\x03\xa9',
+ u'\xda': '\x03\xaa',
+ u'\xdb': '\x03\xab',
+ u'\xdc': '\x03\xac',
+ u'\xdd': '\x03\xad',
+ u'\xde': '\x03\xae',
+ u'\xdf': '\x03\xaf',
+ u'\xe0': '\x03\xb0',
+ u'\xe1': '\x03\xb1',
+ u'\xe2': '\x03\xb2',
+ u'\xe3': '\x03\xb3',
+ u'\xe4': '\x03\xb4',
+ u'\xe5': '\x03\xb5',
+ u'\xe6': '\x03\xb6',
+ u'\xe7': '\x03\xb7',
+ u'\xe8': '\x03\xb8',
+ u'\xe9': '\x03\xb9',
+ u'\xea': '\x03\xba',
+ u'\xeb': '\x03\xbb',
+ u'\xec': '\x03\xbc',
+ u'\xed': '\x03\xbd',
+ u'\xee': '\x03\xbe',
+ u'\xef': '\x03\xbf',
+ u'\xf0': '\x03\xc0',
+ u'\xf1': '\x03\xc1',
+ u'\xf2': '\x03\xc2',
+ u'\xf3': '\x03\xc3',
+ u'\xf4': '\x03\xc4',
+ u'\xf5': '\x03\xc5',
+ u'\xf6': '\x03\xc6',
+ u'\xf7': '\x03\xc7',
+ u'\xf8': '\x03\xc8',
+ u'\xf9': '\x03\xc9',
+ u'\xfa': '\x03\xca',
+ u'\xfb': '\x03\xcb',
+ u'\xfc': '\x03\xcc',
+ u'\xfd': '\x03\xce',
+ u'\xfe': '\x03\xce',
+ }
+
diff --git a/odfrecode/recoders/recoder.py b/odfrecode/recoders/recoder.py
new file mode 100644
index 0000000..01a3c1a
--- /dev/null
+++ b/odfrecode/recoders/recoder.py
@@ -0,0 +1,45 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+class Recoder(object):
+ """
+ base class for all encoders
+ @classvar dst_encoding: encoding of the destination "column"
+ @classvar encoding: name of the encoding
+
+ To write a new encoder simply add a dictionary
+
+ >>> character_table = { "national_encoding_1": "utf8_1",
+ national_encoding_2": "utf8_2",
+ }
+
+ If you don't want to use utf8 as dictionary values specify dst_encoding
+ """
+ dst_encoding = 'utf-8'
+ encoding = None
+
+ def recode(self, character):
+ try:
+ char = unicode(self.character_table[character], self.dst_encoding)
+ except KeyError: # needs no remapping
+ char = character
+ return char
+
diff --git a/odfrecode/recoders/romanian.py b/odfrecode/recoders/romanian.py
new file mode 100644
index 0000000..4f07e9d
--- /dev/null
+++ b/odfrecode/recoders/romanian.py
@@ -0,0 +1,62 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2010 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+
+class Romanian(recoder.Recoder):
+ """
+ this class maps Romanian to unicode
+ See https://bugzilla.redhat.com/show_bug.cgi?id=327501
+ and http://en.wikipedia.org/wiki/Romanian_alphabet for details.
+ """
+
+ encoding = 'romanian'
+ dst_encoding = 'utf-8'
+
+ character_table = {
+ # "S with comma below" (Unicode 0218) - incorrectly implemented as
+ # "S with cedilla below" (Unicode 015E)
+ u'Ş': 'Ș',
+ # "s with comma below" (Unicode 0219) - incorrectly implemented as
+ # "s with cedilla below" (Unicode 015F)
+ u'ş': 'ș',
+ # "T with comma below" (Unicode 021A) - incorrectly implemented as
+ # "T with cedilla below" (Unicode 0162)
+ u'Ţ': 'Ț',
+ # "t with comma below" (Unicode 021B) - incorrectly implemented as
+ # "t with cedilla below" (Unicode 0163)
+ u'ţ': 'ț',
+ # Furthermore the Microsoft's EasternRoman Font has mappings from
+ # "Latin-1 supplement" of these characters:
+ # S with comma below at 0xaa
+ u'ª': 'Ș',
+ # s with comma below at 0xba
+ u'º': 'ș',
+ # T with comma below at 0xde:
+ u'Þ': 'Ț',
+ # t with comma below at 0xfe:
+ u'þ': 'ț',
+ # A with breve at 0xc3:
+ u'Ã': 'Ă',
+ # a with breve at 0xe3:
+ u'ã': 'ă',
+ }
+