aboutsummaryrefslogtreecommitdiff
path: root/odfrecode/__init__.py
blob: 328100e4ba43083607bf7098c9ecea8d7df13be5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# vim:encoding=utf-8:fileencoding=utf-8
#
# odfrecode
#
# (c) 2007,2008 Guido Günther <agx@sigxcpu.org>
#
#   This program is free software; you can redistribute it and/or modify
#   it under the terms of the GNU General Public License as published by
#   the Free Software Foundation; either version 2 of the License, or
#   (at your option) any later version.
#
#   This program is distributed in the hope that it will be useful,
#   but WITHOUT ANY WARRANTY; without even the implied warranty of
#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#   GNU General Public License for more details.
#
#   You should have received a copy of the GNU General Public License
#   along with this program; if not, write to the Free Software
#   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#

import os
import zipfile
import tempfile
import shutil
import xml.dom.minidom

import recoders

__xmls = [ 'content.xml', 'styles.xml' ]

def __convert(textnode, weird_enc):
    """remap source encoding to the corresponding unicode codepoints"""
    newdata=u''
    for key in textnode.data:
        newdata += weird_enc.recode(key)
    textnode.data = newdata


def __recode_xml_tree(node, weird_enc):
   if node.hasChildNodes():
      for kid in node.childNodes:
         __recode_xml_tree(kid, weird_enc)
   elif node.nodeType == node.TEXT_NODE:
         __convert(node, weird_enc)


def __subst_fonts(contents, fontmap):
    """substitute fonts according to fontmap"""
    textprops = contents.getElementsByTagName('style:text-properties')
    for prop in textprops:
        oldfont = prop.getAttribute('style:font-name')
        if not oldfont:
            continue
        try:
            newfont = fontmap[oldfont]
            prop.setAttribute('style:font-name', newfont)
        except KeyError:
            continue


def to_utf8(srcname, encoding, fontmap=None):
    """
    convert a odf document from encoding to unicode
    @param srcname: file to convert
    @type srcname: string
    @param encoding: destination encoding
    @type encoding: Recoder subclass
    @param fontmap: font substitution map { oldfont1: newfont1, oldfont2: newfont2 }
    @type fontmap: dict
    """

    tempdir = tempfile.mkdtemp()
    dstname = os.path.join(tempdir, os.path.basename(srcname))
    shutil.copyfile(srcname, dstname)

    # TODO: should handle the exception (no zipfile)
    src_odf = zipfile.ZipFile(srcname, 'r')
    dst_odf = zipfile.ZipFile(dstname, 'w')

    for fname in src_odf.namelist():
        # TODO: should handle the exception (fname not found)
        data = src_odf.read(fname)
        if fname in __xmls:
            contents = xml.dom.minidom.parseString(data)
            __recode_xml_tree(contents, encoding)
            __subst_fonts(contents, fontmap)
            data = contents.toxml('utf-8')

        dst_odf.writestr(fname, data)

    src_odf.close()
    dst_odf.close()
    return dstname


def get_recoder(encoding):
    """get the recoder for a specific encoding"""
    return recoders.recoders[encoding]()

def get_recoders():
    """get a dict of all recoders"""
    return recoders.recoders

# vim:et:ts=4:sw=4:et:sts=4:ai:set list listchars=tab\:»·,trail\:·: