summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPatrick Ohly <patrick.ohly@intel.com>2013-05-14 17:37:44 +0200
committerPatrick Ohly <patrick.ohly@intel.com>2013-05-16 11:24:48 +0200
commitbab3906e126c3f1c1114621378328dabed7f28b7 (patch)
tree4caf841523bd36ca836815391a76994448f1b23f
parent542236bb5e094aab3276aecddefd51141a83e128 (diff)
PIM: Pinyin sorting for zh languages (part of FDO #64173)
Full interleaving of Pinyin transliterations of Chinese names with Western names can be done by doing an explicit Pinyin transliteration as part of computing the sort keys. This is done using ICU's Transliteration("Han-Latin"), which we have to call directly because boost::locale does not expose that API. We hard-code this behavior for all "zh" languages (as identified by boost::locale), because by default, ICU would sort Pinyin separately from Western names when using the "pinyin" collation.
-rw-r--r--configure.ac2
-rw-r--r--src/dbus/server/pim/locale-factory-boost.cpp45
-rw-r--r--src/dbus/server/server.am4
3 files changed, 47 insertions, 4 deletions
diff --git a/configure.ac b/configure.ac
index 2ef5fb17..7b6c1706 100644
--- a/configure.ac
+++ b/configure.ac
@@ -555,6 +555,8 @@ if test $enable_dbus_service = "yes"; then
# to go to the start of the link line).
DBUS_PIM_PLUGIN_LIBS='$(BOOST_LDFLAGS) $(BOOST_LOCALE_LIB)'
DBUS_PIM_PLUGIN_LDFLAGS=
+ # We need to call ICU directly for the Han->Latin transformation.
+ PKG_CHECK_MODULES(ICU, [icu-uc])
;;
esac
AC_SUBST(DBUS_PIM_PLUGIN_CFLAGS)
diff --git a/src/dbus/server/pim/locale-factory-boost.cpp b/src/dbus/server/pim/locale-factory-boost.cpp
index 5d9870c5..a2d50bee 100644
--- a/src/dbus/server/pim/locale-factory-boost.cpp
+++ b/src/dbus/server/pim/locale-factory-boost.cpp
@@ -31,6 +31,10 @@
#include <boost/locale.hpp>
#include <boost/lexical_cast.hpp>
+#include <unicode/unistr.h>
+#include <unicode/translit.h>
+#include <unicode/bytestream.h>
+
SE_GLIB_TYPE(EBookQuery, e_book_query)
SE_BEGIN_CXX
@@ -51,9 +55,10 @@ SE_BEGIN_CXX
*/
static const boost::locale::collator_base::level_type DEFAULT_COLLATION_LEVEL = boost::locale::collator_base::secondary;
-class CompareBoost : public IndividualCompare {
+class CompareBoost : public IndividualCompare, private boost::noncopyable {
std::locale m_locale;
const boost::locale::collator<char> &m_collator;
+ std::auto_ptr<icu::Transliterator> m_trans;
public:
CompareBoost(const std::locale &locale);
@@ -66,6 +71,31 @@ CompareBoost::CompareBoost(const std::locale &locale) :
m_locale(locale),
m_collator(std::use_facet< boost::locale::collator<char> >(m_locale))
{
+ std::string language = std::use_facet<boost::locale::info>(m_locale).language();
+ if (language == "zh") {
+ // Hard-code Pinyin sorting for all Chinese countries.
+ //
+ // There are three different ways of sorting Chinese and Western names:
+ // 1. Sort Chinese characters in pinyin order, but separate from Latin
+ // 2. Sort them interleaved with Latin, by the first character.
+ // 3. Sort them fully interleaved with Latin.
+ // Source: Mark Davis, ICU, http://sourceforge.net/mailarchive/forum.php?thread_name=CAJ2xs_GEnN-u3%3D%2B7P5puaF1%2BU__fX-4tuA-kEybThN9xsw577Q%40mail.gmail.com&forum_name=icu-support
+ //
+ // Either 2 or 3 is what apparently more people expect. Implementing 2 is
+ // harder, whereas 3 fits into the "generate keys, compare keys" concept
+ // of IndividualCompare, so we kind of arbitrarily implement that.
+ SE_LOG_DEBUG(NULL, "enabling Pinyin");
+
+ UErrorCode status = U_ZERO_ERROR;
+ icu::Transliterator *trans = icu::Transliterator::createInstance("Han-Latin", UTRANS_FORWARD, status);
+ m_trans.reset(trans);
+ if (U_FAILURE(status)) {
+ SE_LOG_WARNING(NULL, "creating ICU Han-Latin Transliterator for Pinyin failed, error code %s; falling back to normal collation", u_errorName(status));
+ m_trans.reset();
+ } else if (!trans) {
+ SE_LOG_WARNING(NULL, "creating ICU Han-Latin Transliterator for Pinyin failed, no error code; falling back to normal collation");
+ }
+ }
}
std::string CompareBoost::transform(const char *string) const
@@ -78,7 +108,18 @@ std::string CompareBoost::transform(const char *string) const
std::string CompareBoost::transform(const std::string &string) const
{
- return m_collator.transform(DEFAULT_COLLATION_LEVEL, string);
+ if (m_trans.get()) {
+ // std::string result;
+ // m_trans->transliterate(icu::StringPiece(string), icu::StringByteSink<std::string>(&result));
+ icu::UnicodeString buffer(string.c_str());
+ m_trans->transliterate(buffer);
+ std::string result;
+ buffer.toUTF8String(result);
+ result = m_collator.transform(DEFAULT_COLLATION_LEVEL, result);
+ return result;
+ } else {
+ return m_collator.transform(DEFAULT_COLLATION_LEVEL, string);
+ }
}
class CompareFirstLastBoost : public CompareBoost {
diff --git a/src/dbus/server/server.am b/src/dbus/server/server.am
index 0391a924..831b3a6c 100644
--- a/src/dbus/server/server.am
+++ b/src/dbus/server/server.am
@@ -57,9 +57,9 @@ nodist_src_dbus_server_libsyncevodbusserver_la_SOURCES =
dist_pkgdata_DATA += src/dbus/server/bluetooth_products.ini
src_dbus_server_libsyncevodbusserver_la_LDFLAGS =
-src_dbus_server_libsyncevodbusserver_la_LIBADD = $(LIBNOTIFY_LIBS) $(MLITE_LIBS) $(DBUS_LIBS) $(PCRECPP_LIBS)
+src_dbus_server_libsyncevodbusserver_la_LIBADD = $(LIBNOTIFY_LIBS) $(MLITE_LIBS) $(DBUS_LIBS) $(PCRECPP_LIBS) $(ICU_LIBS)
src_dbus_server_libsyncevodbusserver_la_CPPFLAGS = -DHAVE_CONFIG_H -DSYNCEVOLUTION_LOCALEDIR=\"${SYNCEVOLUTION_LOCALEDIR}\" -I$(top_srcdir)/src -I$(top_srcdir)/test -I$(top_srcdir) -I$(gdbus_dir) $(BACKEND_CPPFLAGS)
-src_dbus_server_libsyncevodbusserver_la_CXXFLAGS = $(SYNCEVOLUTION_CXXFLAGS) $(CORE_CXXFLAGS) $(SYNTHESIS_CFLAGS) $(GLIB_CFLAGS) $(DBUS_CFLAGS) $(LIBNOTIFY_CFLAGS) $(MLITE_CFLAGS) $(SYNCEVO_WFLAGS)
+src_dbus_server_libsyncevodbusserver_la_CXXFLAGS = $(SYNCEVOLUTION_CXXFLAGS) $(CORE_CXXFLAGS) $(SYNTHESIS_CFLAGS) $(GLIB_CFLAGS) $(DBUS_CFLAGS) $(LIBNOTIFY_CFLAGS) $(MLITE_CFLAGS) $(SYNCEVO_WFLAGS) $(ICU_CFLAGS)
if COND_DBUS_PIM
src_dbus_server_server_cpp_files += \