aboutsummaryrefslogtreecommitdiff
path: root/odfrecode/recoders/romanian.py
diff options
context:
space:
mode:
Diffstat (limited to 'odfrecode/recoders/romanian.py')
-rw-r--r--odfrecode/recoders/romanian.py62
1 files changed, 62 insertions, 0 deletions
diff --git a/odfrecode/recoders/romanian.py b/odfrecode/recoders/romanian.py
new file mode 100644
index 0000000..4f07e9d
--- /dev/null
+++ b/odfrecode/recoders/romanian.py
@@ -0,0 +1,62 @@
+# vim:encoding=utf-8:fileencoding=utf-8
+#
+# odfrecode
+#
+# (c) 2010 Guido Günther <agx@sigxcpu.org>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+import recoder
+
+class Romanian(recoder.Recoder):
+ """
+ this class maps Romanian to unicode
+ See https://bugzilla.redhat.com/show_bug.cgi?id=327501
+ and http://en.wikipedia.org/wiki/Romanian_alphabet for details.
+ """
+
+ encoding = 'romanian'
+ dst_encoding = 'utf-8'
+
+ character_table = {
+ # "S with comma below" (Unicode 0218) - incorrectly implemented as
+ # "S with cedilla below" (Unicode 015E)
+ u'Ş': 'Ș',
+ # "s with comma below" (Unicode 0219) - incorrectly implemented as
+ # "s with cedilla below" (Unicode 015F)
+ u'ş': 'ș',
+ # "T with comma below" (Unicode 021A) - incorrectly implemented as
+ # "T with cedilla below" (Unicode 0162)
+ u'Ţ': 'Ț',
+ # "t with comma below" (Unicode 021B) - incorrectly implemented as
+ # "t with cedilla below" (Unicode 0163)
+ u'ţ': 'ț',
+ # Furthermore the Microsoft's EasternRoman Font has mappings from
+ # "Latin-1 supplement" of these characters:
+ # S with comma below at 0xaa
+ u'ª': 'Ș',
+ # s with comma below at 0xba
+ u'º': 'ș',
+ # T with comma below at 0xde:
+ u'Þ': 'Ț',
+ # t with comma below at 0xfe:
+ u'þ': 'ț',
+ # A with breve at 0xc3:
+ u'Ã': 'Ă',
+ # a with breve at 0xe3:
+ u'ã': 'ă',
+ }
+