Switch to unified view

a/src/rcldb/expansiondbs.cpp b/src/rcldb/expansiondbs.cpp
...
...
82
    string ermsg;
82
    string ermsg;
83
    try {
83
    try {
84
        for (Xapian::TermIterator it = wdb.allterms_begin(); 
84
        for (Xapian::TermIterator it = wdb.allterms_begin(); 
85
         it != wdb.allterms_end(); it++) {
85
         it != wdb.allterms_end(); it++) {
86
86
87
      // Skip terms which don't look like natural language words.
88
            if (!Db::isSpellingCandidate(*it)) {
89
                LOGDEB1(("createExpansionDbs: skipped: [%s]\n", (*it).c_str()));
90
                continue;
91
            }
92
93
        // Detect and skip CJK terms.
87
        // Detect and skip CJK terms.
94
      // We're still sending all other multibyte utf-8 chars to
95
            // the stemmer, which is not too well defined for
96
            // xapian<1.0 (very obsolete now), but seems to work
97
            // anyway. There shouldn't be too many in any case because
98
            // accents are stripped at this point. 
99
      // The effect of stripping accents on stemming is not good, 
100
            // (e.g: in french partimes -> partim, parti^mes -> part)
101
      // but fixing the issue would be complicated.
102
        Utf8Iter utfit(*it);
88
        Utf8Iter utfit(*it);
103
        if (TextSplit::isCJK(*utfit)) {
89
        if (TextSplit::isCJK(*utfit)) {
104
        // LOGDEB(("stemskipped: Skipping CJK\n"));
90
        // LOGDEB(("stemskipped: Skipping CJK\n"));
105
        continue;
91
        continue;
106
        }
92
        }
...
...
114
        if (!o_index_stripchars) {
100
        if (!o_index_stripchars) {
115
        unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
101
        unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
116
        diacasedb.addSynonym(*it);
102
        diacasedb.addSynonym(*it);
117
        }
103
        }
118
#endif
104
#endif
105
106
      // Dont' apply stemming to terms which don't look like
107
      // natural language words.
108
            if (!Db::isSpellingCandidate(*it)) {
109
                LOGDEB1(("createExpansionDbs: skipped: [%s]\n", (*it).c_str()));
110
                continue;
111
            }
119
112
120
        // Create stemming synonym for every language. The input is the 
113
        // Create stemming synonym for every language. The input is the 
121
        // lowercase accented term
114
        // lowercase accented term
122
        for (unsigned int i = 0; i < langs.size(); i++) {
115
        for (unsigned int i = 0; i < langs.size(); i++) {
123
        stemdbs[i].addSynonym(lower);
116
        stemdbs[i].addSynonym(lower);