# vim:encoding=utf-8:fileencoding=utf-8 # # odfrecode # # (c) 2007,2008 Guido Günther # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # import os import zipfile import tempfile import shutil import xml.dom.minidom import recoders __xmls = [ 'content.xml', 'styles.xml' ] def __convert(textnode, weird_enc): """remap source encoding to the corresponding unicode codepoints""" newdata=u'' for key in textnode.data: newdata += weird_enc.recode(key) textnode.data = newdata def __recode_xml_tree(node, weird_enc): if node.hasChildNodes(): for kid in node.childNodes: __recode_xml_tree(kid, weird_enc) elif node.nodeType == node.TEXT_NODE: __convert(node, weird_enc) def __subst_fonts(contents, fontmap): """substitute fonts according to fontmap""" textprops = contents.getElementsByTagName('style:text-properties') for prop in textprops: oldfont = prop.getAttribute('style:font-name') if not oldfont: continue try: newfont = fontmap[oldfont] prop.setAttribute('style:font-name', newfont) except KeyError: continue def to_utf8(srcname, encoding, fontmap=None): """ convert a odf document from encoding to unicode @param srcname: file to convert @type srcname: string @param encoding: destination encoding @type encoding: Recoder subclass @param fontmap: font substitution map { oldfont1: newfont1, oldfont2: newfont2 } @type fontmap: dict """ tempdir = tempfile.mkdtemp() dstname = os.path.join(tempdir, os.path.basename(srcname)) shutil.copyfile(srcname, dstname) # TODO: should handle the exception (no zipfile) src_odf = zipfile.ZipFile(srcname, 'r') dst_odf = zipfile.ZipFile(dstname, 'w') for fname in src_odf.namelist(): # TODO: should handle the exception (fname not found) data = src_odf.read(fname) if fname in __xmls: contents = xml.dom.minidom.parseString(data) __recode_xml_tree(contents, encoding) __subst_fonts(contents, fontmap) data = contents.toxml('utf-8') dst_odf.writestr(fname, data) src_odf.close() dst_odf.close() return dstname def get_recoder(encoding): """get the recoder for a specific encoding""" return recoders.recoders[encoding]() def get_recoders(): """get a dict of all recoders""" return recoders.recoders # vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·: