aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--bin/odfrecode75
-rw-r--r--bin/odfrecode-gtk116
-rw-r--r--odfrecode/__init__.py105
-rw-r--r--odfrecode/recoders/__init__.py38
-rw-r--r--odfrecode/recoders/armscii.py125
-rw-r--r--odfrecode/recoders/cyrillic.py151
-rw-r--r--odfrecode/recoders/georgian.py65
-rw-r--r--odfrecode/recoders/greek.py154
-rw-r--r--odfrecode/recoders/recoder.py45
-rw-r--r--odfrecode/recoders/romanian.py62
-rw-r--r--setup.py13
11 files changed, 949 insertions, 0 deletions
diff --git a/bin/odfrecode b/bin/odfrecode
new file mode 100644
index 0000000..401f46d
--- /dev/null
+++ b/bin/odfrecode
@@ -0,0 +1,75 @@
+#!/usr/bin/python
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# simple national to unicode open document format converter
+#
+# Migrating from M* W*rd to ODF can be a pain if documents use characters from
+# fonts that only support some non unicode encodings like armscii. Remap these
+# to the appriopriate unicode codepoints. Any corrections are very welcome.
+#
+# (c) 2007,2008,2009 Guido Günther <agx@sigxcpu.org>
+# (c) 2008 Torsten Werner <twerner@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import sys, os, shutil
+import odfrecode
+from optparse import OptionParser
+
+def main(argv):
+ parser = OptionParser()
+ parser.add_option("--recoder", dest="recoder",
+ help="use recoder RECODER", metavar="recoder")
+
+ encoding = sys.getfilesystemencoding()
+
+ (options, args) = parser.parse_args()
+
+ if len(args) != 1:
+ print >>sys.stderr, "Need a file to convert."
+ return 1
+ else:
+ filename = os.path.abspath(args[0])
+
+ if not options.recoder:
+ print >>sys.stderr, "Missing recoder."
+ return 1
+ else:
+ try:
+ converter = odfrecode.get_recoder(options.recoder)
+ except KeyError:
+ print >>sys.stderr, "No recoder for '%s' found." % options.recoder
+ print >>sys.stderr, "Available recoders: %s." % odfrecode.recoders.recoders.keys()
+ return 1
+
+ prefix, postfix = filename.decode(encoding).rsplit(u'.', 1)
+ backup = u"%s.%s.%s" % (prefix, options.recoder, postfix)
+ try:
+ os.unlink(backup)
+ except OSError:
+ pass # file doesn't exist
+ shutil.copy(filename, backup)
+
+ dstname = odfrecode.to_utf8(filename, converter)
+ os.unlink(filename)
+ shutil.copy(dstname, filename)
+ os.unlink(dstname)
+
+ print filename
+
+if __name__ == "__main__":
+ main(sys.argv)
+
+# vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·:
diff --git a/bin/odfrecode-gtk b/bin/odfrecode-gtk
new file mode 100644
index 0000000..bd822c3
--- /dev/null
+++ b/bin/odfrecode-gtk
@@ -0,0 +1,116 @@
+#!/usr/bin/python
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# call the apropriate recoders for the unicode open document format converter
+#
+# Migrating from M* W*rd to ODF can be a pain if documents use characters from
+# fonts that only support some non unicode encodings like armscii. Remap these
+# to the appriopriate unicode codepoints. Any corrections are very welcome.
+#
+# (c) 2007,2008,2009 Guido Günther <agx@sigxcpu.org>
+# (c) 2008 Torsten Werner <twerner@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+import sys, os, shutil
+import odfrecode
+from optparse import OptionParser
+import gtk
+import gnomevfs
+
+fontmap = { "EastRoman": "DejaVu Sans" }
+
+class ConvError(Exception):
+ pass
+
+class SuccessDialog(gtk.Dialog):
+ def __init__(self, filename, backup):
+ self.uri = "file://%s" % filename
+ gtk.Dialog.__init__(self, buttons=(gtk.STOCK_OK, gtk.RESPONSE_ACCEPT))
+ label = gtk.Label("succesfully converted to Unicode.")
+ uri = gtk.LinkButton(self.uri, label=os.path.basename(filename))
+ uri.connect('clicked', self.show_uri)
+ uri.set_tooltip_text("Click to open '%s'" % filename)
+
+ stock = gtk.image_new_from_stock(gtk.STOCK_DIALOG_INFO,
+ gtk.ICON_SIZE_DIALOG)
+ hbox = gtk.HBox(False, 3)
+ hbox.pack_start(stock, True, True, 0)
+ hbox.add(uri)
+ hbox.add(label)
+ self.vbox.add(hbox)
+ self.vbox.add(gtk.Label("A backup copy was saved as '%s'."
+ % os.path.basename(backup)))
+ self.show_all()
+
+ def show_uri(self, dummy):
+ gnomevfs.url_show(self.uri)
+ self.destroy()
+
+def main(argv):
+ try:
+ encoding = sys.getfilesystemencoding()
+ parser = OptionParser()
+ parser.add_option("--recoder", dest="recoder",
+ help="use recoder RECODER", metavar="recoder")
+ (options, args) = parser.parse_args()
+
+ if len(args) != 1:
+ raise ConvError, "Need a file to convert."
+ else:
+ filename = os.path.abspath(args[0])
+ try:
+ fileinfo = gnomevfs.get_file_info(filename, gnomevfs.FILE_INFO_GET_MIME_TYPE)
+ except gnomevfs.NotFoundError:
+ raise ConvError, "'%s' nicht gefunden" % filename
+ if fileinfo.mime_type not in [ "application/vnd.oasis.opendocument.text",
+ "application/vnd.oasis.opendocument.text-template" ]:
+ raise ConvError, "'%s' is no OpenDocument Text, but '%s'." % (filename, fileinfo.mime_type)
+
+ if not options.recoder:
+ # FIXME: list available recoders
+ raise ConvError, "Missing converter.\nSupported converters are: '%s'" % \
+ ", ".join(odfrecode.get_recoders().keys())
+ else:
+ try:
+ converter = odfrecode.get_recoder(options.recoder)
+ except KeyError:
+ raise ConvError, "No recoder for '%s' found." % options.recoder
+
+ prefix, postfix = filename.decode(encoding).rsplit(u'.', 1)
+ backup = u"%s.%s.%s" % (prefix, options.recoder, postfix)
+ try:
+ os.unlink(backup)
+ except OSError:
+ pass # file doesn't exist
+ shutil.copy(filename, backup)
+
+ dstname = odfrecode.to_utf8(filename, converter, fontmap)
+ os.unlink(filename)
+ shutil.copy(dstname, filename)
+ os.unlink(dstname)
+ dialog = SuccessDialog(filename, backup)
+ except ConvError, error:
+ dialog = gtk.MessageDialog(type=gtk.MESSAGE_ERROR, buttons=gtk.BUTTONS_OK,
+ message_format="Error during conversion")
+ dialog.format_secondary_text(str(error))
+ dialog.set_property("title", "ODF Charset Converter")
+ dialog.run()
+ dialog.destroy()
+
+if __name__ == "__main__":
+ main(sys.argv)
+
+# vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·:
diff --git a/odfrecode/__init__.py b/odfrecode/__init__.py
new file mode 100644
index 0000000..328100e
--- /dev/null
+++ b/odfrecode/__init__.py
@@ -0,0 +1,105 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import os
+import zipfile
+import tempfile
+import shutil
+import xml.dom.minidom
+
+import recoders
+
+__xmls = [ 'content.xml', 'styles.xml' ]
+
+def __convert(textnode, weird_enc):
+ """remap source encoding to the corresponding unicode codepoints"""
+ newdata=u''
+ for key in textnode.data:
+ newdata += weird_enc.recode(key)
+ textnode.data = newdata
+
+
+def __recode_xml_tree(node, weird_enc):
+ if node.hasChildNodes():
+ for kid in node.childNodes:
+ __recode_xml_tree(kid, weird_enc)
+ elif node.nodeType == node.TEXT_NODE:
+ __convert(node, weird_enc)
+
+
+def __subst_fonts(contents, fontmap):
+ """substitute fonts according to fontmap"""
+ textprops = contents.getElementsByTagName('style:text-properties')
+ for prop in textprops:
+ oldfont = prop.getAttribute('style:font-name')
+ if not oldfont:
+ continue
+ try:
+ newfont = fontmap[oldfont]
+ prop.setAttribute('style:font-name', newfont)
+ except KeyError:
+ continue
+
+
+def to_utf8(srcname, encoding, fontmap=None):
+ """
+ convert a odf document from encoding to unicode
+ @param srcname: file to convert
+ @type srcname: string
+ @param encoding: destination encoding
+ @type encoding: Recoder subclass
+ @param fontmap: font substitution map { oldfont1: newfont1, oldfont2: newfont2 }
+ @type fontmap: dict
+ """
+
+ tempdir = tempfile.mkdtemp()
+ dstname = os.path.join(tempdir, os.path.basename(srcname))
+ shutil.copyfile(srcname, dstname)
+
+ # TODO: should handle the exception (no zipfile)
+ src_odf = zipfile.ZipFile(srcname, 'r')
+ dst_odf = zipfile.ZipFile(dstname, 'w')
+
+ for fname in src_odf.namelist():
+ # TODO: should handle the exception (fname not found)
+ data = src_odf.read(fname)
+ if fname in __xmls:
+ contents = xml.dom.minidom.parseString(data)
+ __recode_xml_tree(contents, encoding)
+ __subst_fonts(contents, fontmap)
+ data = contents.toxml('utf-8')
+
+ dst_odf.writestr(fname, data)
+
+ src_odf.close()
+ dst_odf.close()
+ return dstname
+
+
+def get_recoder(encoding):
+ """get the recoder for a specific encoding"""
+ return recoders.recoders[encoding]()
+
+def get_recoders():
+ """get a dict of all recoders"""
+ return recoders.recoders
+
+# vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·:
diff --git a/odfrecode/recoders/__init__.py b/odfrecode/recoders/__init__.py
new file mode 100644
index 0000000..5ec8e84
--- /dev/null
+++ b/odfrecode/recoders/__init__.py
@@ -0,0 +1,38 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008,2009 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+import armscii
+import georgian
+import romanian
+import cyrillic
+import greek
+
+# List of available recoders
+recoders = {
+ "armscii8": armscii.Armscii8,
+ "georgian": georgian.Georgian,
+ "romanian": romanian.Romanian,
+ "cyrillic": cyrillic.Cyrillic,
+ "greek": greek.Greek,
+ }
+
+# vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·:
diff --git a/odfrecode/recoders/armscii.py b/odfrecode/recoders/armscii.py
new file mode 100644
index 0000000..9098ba6
--- /dev/null
+++ b/odfrecode/recoders/armscii.py
@@ -0,0 +1,125 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+# the mapping table is based on a utf16 encoded table of a vbscript that is:
+# (c) 2003 VAHE GEVORGYAN, UNDER GPL LICENSE
+
+import recoder
+
+class Armscii8(recoder.Recoder):
+ """this class maps armscii8 to unicode"""
+
+ encoding = 'armscii8'
+ dst_encoding = 'utf-16-be'
+
+ character_table = {
+ u'\xd8': '\x05\x44',
+ u'\xd9': '\x05\x74',
+ u'\xd6': '\x05\x43',
+ u'\xd7': '\x05\x73',
+ u'\xd4': '\x05\x42',
+ u'\xd5': '\x05\x72',
+ u'\xd2': '\x05\x41',
+ u'\xd3': '\x05\x71',
+ u'\xda': '\x05\x45',
+ u'\xdb': '\x05\x75',
+ u'\xa4': '\x00\x29',
+ u'\xa5': '\x00\x28',
+ u'\xa6': '\x00\xbb',
+ u'\xa7': '\x00\xab',
+ u'\xdc': '\x05\x46',
+ u'\xc7': '\x05\x6b',
+ u'\xc6': '\x05\x3b',
+ u'\xc3': '\x05\x69',
+ u'\xc2': '\x05\x39',
+ u'\xc5': '\x05\x6a',
+ u'\xc4': '\x05\x3a',
+ u'\xbf': '\x05\x67',
+ u'\xbe': '\x05\x37',
+ u'\xc1': '\x05\x68',
+ u'\xc0': '\x05\x38',
+ u'\xfc': '\x05\x56',
+ u'\xfd': '\x05\x86',
+ u'\xfa': '\x05\x55',
+ u'\xfb': '\x05\x85',
+ u'\xfe': '\x05\x5a',
+ u'\xf5': '\x05\x82',
+ u'\xf4': '\x05\x52',
+ u'\xf7': '\x05\x83',
+ u'\xf6': '\x05\x53',
+ u'\xf1': '\x05\x80',
+ u'\xf0': '\x05\x50',
+ u'\xf3': '\x05\x81',
+ u'\xf2': '\x05\x51',
+ u'\xf9': '\x05\x84',
+ u'\xf8': '\x05\x54',
+ u'\xb3': '\x05\x61',
+ u'\xb2': '\x05\x31',
+ u'\xb1': '\x05\x5e',
+ u'\xb0': '\x05\x5b',
+ u'\xaf': '\x05\x5c',
+ u'\xae': '\x20\x26',
+ u'\xad': '\x05\x8a',
+ u'\xac': '\x00\x2d',
+ u'\xab': '\x00\x2c',
+ u'\xaa': '\x05\x5d',
+ u'\xb6': '\x05\x33',
+ u'\xb7': '\x05\x63',
+ u'\xb4': '\x05\x32',
+ u'\xb5': '\x05\x62',
+ u'\xba': '\x05\x35',
+ u'\xbb': '\x05\x65',
+ u'\xb8': '\x05\x34',
+ u'\xb9': '\x05\x64',
+ u'\xbc': '\x05\x36',
+ u'\xbd': '\x05\x66',
+ u'\xa8': '\x05\x87',
+ u'\xa9': '\x00\x2e',
+ u'\xe5': '\x05\x7a',
+ u'\xe4': '\x05\x4a',
+ u'\xe3': '\x05\x79',
+ u'\xe2': '\x05\x49',
+ u'\xe1': '\x05\x78',
+ u'\xe0': '\x05\x48',
+ u'\xdf': '\x05\x77',
+ u'\xde': '\x05\x47',
+ u'\xdd': '\x05\x76',
+ u'\xa3': '\x05\x89',
+ u'\xee': '\x05\x4f',
+ u'\xef': '\x05\x7f',
+ u'\xea': '\x05\x4d',
+ u'\xeb': '\x05\x7d',
+ u'\xec': '\x05\x4e',
+ u'\xed': '\x05\x7e',
+ u'\xe6': '\x05\x4b',
+ u'\xe7': '\x05\x7b',
+ u'\xe8': '\x05\x4c',
+ u'\xe9': '\x05\x7c',
+ u'\xc9': '\x05\x6c',
+ u'\xc8': '\x05\x3c',
+ u'\xcb': '\x05\x6d',
+ u'\xca': '\x05\x3d',
+ u'\xcd': '\x05\x6e',
+ u'\xcc': '\x05\x3e',
+ u'\xcf': '\x05\x6f',
+ u'\xce': '\x05\x3f',
+ u'\xd1': '\x05\x70',
+ u'\xd0': '\x05\x40',
+ }
diff --git a/odfrecode/recoders/cyrillic.py b/odfrecode/recoders/cyrillic.py
new file mode 100644
index 0000000..7a3cdda
--- /dev/null
+++ b/odfrecode/recoders/cyrillic.py
@@ -0,0 +1,151 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2009 Guido Günther <agx@sigxcpu.org>
+# (c) 2007 Torsten Werner <twerner@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+
+class Cyrillic(recoder.Recoder):
+ """
+ this class maps cyrillic characters from 0x80-0xff to unicode
+ the mapping table is based on code from VeraWeb's madlan converter
+ """
+
+ encoding = 'cyrillic'
+ dst_encoding = 'utf-16-be'
+
+ character_table = {
+ u'\x7f': '\x04\x0c',
+ u'\x80': '\x04\x02',
+ u'\x81': '\x04\x03',
+ u'\x82': '\x20\x1a',
+ u'\x83': '\x04\x53',
+ u'\x84': '\x20\x1e',
+ u'\x85': '\x20\x26',
+ u'\x86': '\x20\x20',
+ u'\x87': '\x20\x21',
+ u'\x88': '\x00\x20',
+ u'\x89': '\x20\x30',
+ u'\x8a': '\x04\x09',
+ u'\x8b': '\x20\x39',
+ u'\x8c': '\x04\x0a',
+ u'\x8d': '\x04\x0c',
+ u'\x8e': '\x04\x0b',
+ u'\x8f': '\x04\x0f',
+ u'\x90': '\x04\x52',
+ u'\x91': '\x20\x18',
+ u'\x92': '\x20\x19',
+ u'\x93': '\x20\x1c',
+ u'\x94': '\x20\x1d',
+ u'\x95': '\x20\x22',
+ u'\x96': '\x20\x13',
+ u'\x97': '\x20\x14',
+ u'\x98': '\x00\x20',
+ u'\x99': '\x21\x22',
+ u'\x9a': '\x04\x59',
+ u'\x9b': '\x20\x3a',
+ u'\x9c': '\x04\x5a',
+ u'\x9d': '\x04\x5c',
+ u'\x9e': '\x04\x5b',
+ u'\x9f': '\x04\x5f',
+ u'\xa0': '\x00\x20',
+ u'\xa1': '\x04\x0e',
+ u'\xa2': '\x04\x5e',
+ u'\xa3': '\x04\x08',
+ u'\xa5': '\x04\x90',
+ u'\xa8': '\x04\x01',
+ u'\xaa': '\x04\x04',
+ u'\xaf': '\x04\x07',
+ u'\xb2': '\x04\x06',
+ u'\xb3': '\x04\x57',
+ u'\xb4': '\x04\x91',
+ u'\xb8': '\x04\x51',
+ u'\xb9': '\x21\x16',
+ u'\xba': '\x04\x54',
+ u'\xbc': '\x04\x58',
+ u'\xbd': '\x04\x05',
+ u'\xbe': '\x04\x55',
+ u'\xbf': '\x04\x57',
+ u'\xc0': '\x04\x10',
+ u'\xc1': '\x04\x11',
+ u'\xc2': '\x04\x12',
+ u'\xc3': '\x04\x13',
+ u'\xc4': '\x04\x14',
+ u'\xc5': '\x04\x15',
+ u'\xc6': '\x04\x16',
+ u'\xc7': '\x04\x17',
+ u'\xc8': '\x04\x18',
+ u'\xc9': '\x04\x19',
+ u'\xca': '\x04\x1a',
+ u'\xcb': '\x04\x1b',
+ u'\xcc': '\x04\x1c',
+ u'\xcd': '\x04\x1d',
+ u'\xce': '\x04\x1e',
+ u'\xcf': '\x04\x1f',
+ u'\xd0': '\x04\x20',
+ u'\xd1': '\x04\x21',
+ u'\xd2': '\x04\x22',
+ u'\xd3': '\x04\x23',
+ u'\xd4': '\x04\x24',
+ u'\xd5': '\x04\x25',
+ u'\xd6': '\x04\x26',
+ u'\xd7': '\x04\x27',
+ u'\xd8': '\x04\x28',
+ u'\xd9': '\x04\x29',
+ u'\xda': '\x04\x2a',
+ u'\xdb': '\x04\x2b',
+ u'\xdc': '\x04\x2c',
+ u'\xdd': '\x04\x2d',
+ u'\xde': '\x04\x2e',
+ u'\xdf': '\x04\x2f',
+ u'\xe0': '\x04\x30',
+ u'\xe1': '\x04\x31',
+ u'\xe2': '\x04\x32',
+ u'\xe3': '\x04\x33',
+ u'\xe4': '\x04\x34',
+ u'\xe5': '\x04\x35',
+ u'\xe6': '\x04\x36',
+ u'\xe7': '\x04\x37',
+ u'\xe8': '\x04\x38',
+ u'\xe9': '\x04\x39',
+ u'\xea': '\x04\x3a',
+ u'\xeb': '\x04\x3b',
+ u'\xec': '\x04\x3c',
+ u'\xed': '\x04\x3d',
+ u'\xee': '\x04\x3e',
+ u'\xef': '\x04\x3f',
+ u'\xf0': '\x04\x40',
+ u'\xf1': '\x04\x41',
+ u'\xf2': '\x04\x42',
+ u'\xf3': '\x04\x43',
+ u'\xf4': '\x04\x44',
+ u'\xf5': '\x04\x45',
+ u'\xf6': '\x04\x46',
+ u'\xf7': '\x04\x47',
+ u'\xf8': '\x04\x48',
+ u'\xf9': '\x04\x49',
+ u'\xfa': '\x04\x4a',
+ u'\xfb': '\x04\x4b',
+ u'\xfc': '\x04\x4c',
+ u'\xfd': '\x04\x4d',
+ u'\xfe': '\x04\x4e',
+ u'\xff': '\x04\x4f',
+ }
+
diff --git a/odfrecode/recoders/georgian.py b/odfrecode/recoders/georgian.py
new file mode 100644
index 0000000..247cc5a
--- /dev/null
+++ b/odfrecode/recoders/georgian.py
@@ -0,0 +1,65 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+# (c) 2008 Torsten Werner <twerner@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+
+class Georgian(recoder.Recoder):
+ """this class maps georgian to unicode"""
+
+ encoding = 'georgian'
+ dst_encoding = 'utf-8'
+
+ character_table = {
+ u'\x43': 'ჩ',
+ u'\x4a': 'ჟ',
+ u'\x52': 'ღ',
+ u'\x53': 'შ',
+ u'\x54': 'თ',
+ u'\x57': 'ჭ',
+ u'\x5a': 'ძ',
+ u'\x61': 'ა',
+ u'\x62': 'ბ',
+ u'\x63': 'ც',
+ u'\x64': 'დ',
+ u'\x65': 'ე',
+ u'\x66': 'ფ',
+ u'\x67': 'გ',
+ u'\x68': 'ჰ',
+ u'\x69': 'ი',
+ u'\x6a': 'ჯ',
+ u'\x6b': 'კ',
+ u'\x6c': 'ლ',
+ u'\x6d': 'მ',
+ u'\x6e': 'ნ',
+ u'\x6f': 'ო',
+ u'\x70': 'პ',
+ u'\x71': 'ქ',
+ u'\x72': 'რ',
+ u'\x73': 'ს',
+ u'\x74': 'ტ',
+ u'\x75': 'უ',
+ u'\x76': 'ვ',
+ u'\x77': 'წ',
+ u'\x78': 'ხ',
+ u'\x79': 'ყ',
+ u'\x7a': 'ზ',
+ }
diff --git a/odfrecode/recoders/greek.py b/odfrecode/recoders/greek.py
new file mode 100644
index 0000000..7610338
--- /dev/null
+++ b/odfrecode/recoders/greek.py
@@ -0,0 +1,154 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2009 Guido Günther <agx@sigxcpu.org>
+# (c) 2007 Torsten Werner <twerner@debian.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+
+class Greek(recoder.Recoder):
+ """
+ this class maps greek characters from 0x80-0xff to unicode
+ the mapping table is based on code from VeraWeb's madlan converter
+ """
+
+ encoding = 'cyrillic'
+ dst_encoding = 'utf-16-be'
+
+ character_table = {
+ u'\x82': '\x20\x1a',
+ u'\x83': '\x01\x92',
+ u'\x84': '\x20\x1e',
+ u'\x85': '\x20\x26',
+ u'\x86': '\x20\x20',
+ u'\x87': '\x20\x21',
+ u'\x88': '\x02\xc6',
+ u'\x89': '\x20\x30',
+ u'\x8a': '\x01\x60',
+ u'\x8b': '\x20\x39',
+ u'\x8c': '\x01\x52',
+ u'\x91': '\x20\x18',
+ u'\x92': '\x20\x19',
+ u'\x93': '\x20\x1c',
+ u'\x94': '\x20\x10',
+ u'\x95': '\x20\x22',
+ u'\x96': '\x20\x13',
+ u'\x97': '\x20\x14',
+ u'\x98': '\x02\xdc',
+ u'\x99': '\x21\x22',
+ u'\x9a': '\x01\x61',
+ u'\x9b': '\x20\x3a',
+ u'\x9c': '\x01\x53',
+ u'\x9f': '\x01\x78',
+ u'\xa1': '\x03\x85',
+ u'\xa2': '\x03\x86',
+ u'\xa3': '\x00\xa3',
+ u'\xa4': '\x00\xa4',
+ u'\xa5': '\x00\xa5',
+ u'\xa6': '\x00\xa6',
+ u'\xa7': '\x00\xa7',
+ u'\xa8': '\x00\xa8',
+ u'\xa9': '\x00\xa9',
+ u'\xaa': '\x00\xaa',
+ u'\xab': '\x00\xab',
+ u'\xac': '\x00\xac',
+ u'\xad': '\x00\xad',
+ u'\xae': '\x00\xae',
+ u'\xaf': '\x00\xaf',
+ u'\xb0': '\x00\xb0',
+ u'\xb1': '\x00\xb1',
+ u'\xb2': '\x00\xb2',
+ u'\xb3': '\x00\xb3',
+ u'\xb4': '\x00\xb4',
+ u'\xb5': '\x00\xb5',
+ u'\xb6': '\x00\xb6',
+ u'\xb7': '\x00\xb7',
+ u'\xb8': '\x03\x88',
+ u'\xb9': '\x03\x89',
+ u'\xba': '\x03\x8a',
+ u'\xbb': '\x00\xbb',
+ u'\xbc': '\x03\x8c',
+ u'\xbd': '\x00\xbd',
+ u'\xbe': '\x03\x8e',
+ u'\xbf': '\x03\x8f',
+ u'\xc0': '\x03\x90',
+ u'\xc1': '\x03\x91',
+ u'\xc2': '\x03\x92',
+ u'\xc3': '\x03\x93',
+ u'\xc4': '\x03\x94',
+ u'\xc5': '\x03\x95',
+ u'\xc6': '\x03\x96',
+ u'\xc7': '\x03\x97',
+ u'\xc8': '\x03\x98',
+ u'\xc9': '\x03\x99',
+ u'\xca': '\x03\x9a',
+ u'\xcb': '\x03\x9b',
+ u'\xcc': '\x03\x9c',
+ u'\xcd': '\x03\x9d',
+ u'\xce': '\x03\x9e',
+ u'\xcf': '\x03\x9f',
+ u'\xd0': '\x03\xa0',
+ u'\xd1': '\x03\xa1',
+ u'\xd2': '\x03\xda',
+ u'\xd3': '\x03\xa3',
+ u'\xd4': '\x03\xa4',
+ u'\xd5': '\x03\xa5',
+ u'\xd6': '\x03\xa6',
+ u'\xd7': '\x03\xa7',
+ u'\xd8': '\x03\xa8',
+ u'\xd9': '\x03\xa9',
+ u'\xda': '\x03\xaa',
+ u'\xdb': '\x03\xab',
+ u'\xdc': '\x03\xac',
+ u'\xdd': '\x03\xad',
+ u'\xde': '\x03\xae',
+ u'\xdf': '\x03\xaf',
+ u'\xe0': '\x03\xb0',
+ u'\xe1': '\x03\xb1',
+ u'\xe2': '\x03\xb2',
+ u'\xe3': '\x03\xb3',
+ u'\xe4': '\x03\xb4',
+ u'\xe5': '\x03\xb5',
+ u'\xe6': '\x03\xb6',
+ u'\xe7': '\x03\xb7',
+ u'\xe8': '\x03\xb8',
+ u'\xe9': '\x03\xb9',
+ u'\xea': '\x03\xba',
+ u'\xeb': '\x03\xbb',
+ u'\xec': '\x03\xbc',
+ u'\xed': '\x03\xbd',
+ u'\xee': '\x03\xbe',
+ u'\xef': '\x03\xbf',
+ u'\xf0': '\x03\xc0',
+ u'\xf1': '\x03\xc1',
+ u'\xf2': '\x03\xc2',
+ u'\xf3': '\x03\xc3',
+ u'\xf4': '\x03\xc4',
+ u'\xf5': '\x03\xc5',
+ u'\xf6': '\x03\xc6',
+ u'\xf7': '\x03\xc7',
+ u'\xf8': '\x03\xc8',
+ u'\xf9': '\x03\xc9',
+ u'\xfa': '\x03\xca',
+ u'\xfb': '\x03\xcb',
+ u'\xfc': '\x03\xcc',
+ u'\xfd': '\x03\xce',
+ u'\xfe': '\x03\xce',
+ }
+
diff --git a/odfrecode/recoders/recoder.py b/odfrecode/recoders/recoder.py
new file mode 100644
index 0000000..01a3c1a
--- /dev/null
+++ b/odfrecode/recoders/recoder.py
@@ -0,0 +1,45 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+class Recoder(object):
+ """
+ base class for all encoders
+ @classvar dst_encoding: encoding of the destination "column"
+ @classvar encoding: name of the encoding
+
+ To write a new encoder simply add a dictionary
+
+ >>> character_table = { "national_encoding_1": "utf8_1",
+ national_encoding_2": "utf8_2",
+ }
+
+ If you don't want to use utf8 as dictionary values specify dst_encoding
+ """
+ dst_encoding = 'utf-8'
+ encoding = None
+
+ def recode(self, character):
+ try:
+ char = unicode(self.character_table[character], self.dst_encoding)
+ except KeyError: # needs no remapping
+ char = character
+ return char
+
diff --git a/odfrecode/recoders/romanian.py b/odfrecode/recoders/romanian.py
new file mode 100644
index 0000000..4f07e9d
--- /dev/null
+++ b/odfrecode/recoders/romanian.py
@@ -0,0 +1,62 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2010 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+
+class Romanian(recoder.Recoder):
+ """
+ this class maps Romanian to unicode
+ See https://bugzilla.redhat.com/show_bug.cgi?id=327501
+ and http://en.wikipedia.org/wiki/Romanian_alphabet for details.
+ """
+
+ encoding = 'romanian'
+ dst_encoding = 'utf-8'
+
+ character_table = {
+ # "S with comma below" (Unicode 0218) - incorrectly implemented as
+ # "S with cedilla below" (Unicode 015E)
+ u'Ş': 'Ș',
+ # "s with comma below" (Unicode 0219) - incorrectly implemented as
+ # "s with cedilla below" (Unicode 015F)
+ u'ş': 'ș',
+ # "T with comma below" (Unicode 021A) - incorrectly implemented as
+ # "T with cedilla below" (Unicode 0162)
+ u'Ţ': 'Ț',
+ # "t with comma below" (Unicode 021B) - incorrectly implemented as
+ # "t with cedilla below" (Unicode 0163)
+ u'ţ': 'ț',
+ # Furthermore the Microsoft's EasternRoman Font has mappings from
+ # "Latin-1 supplement" of these characters:
+ # S with comma below at 0xaa
+ u'ª': 'Ș',
+ # s with comma below at 0xba
+ u'º': 'ș',
+ # T with comma below at 0xde:
+ u'Þ': 'Ț',
+ # t with comma below at 0xfe:
+ u'þ': 'ț',
+ # A with breve at 0xc3:
+ u'Ã': 'Ă',
+ # a with breve at 0xe3:
+ u'ã': 'ă',
+ }
+
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..e90c56f
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,13 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+
+from distutils.core import setup
+
+setup(name="odfrecode",
+ version="0.0.1",
+ author="Guido Günther",
+ author_email="agx@sigxcpu.org",
+ scripts = [ "bin/odfrecode", "bin/odfrecode-gtk" ],
+ packages =[ "odfrecode",
+ "odfrecode.recoders",
+ ])
+