Switch to unified view

a/src/rcldb/expansiondbs.cpp b/src/rcldb/expansiondbs.cpp
...
...
46
    // Erase and recreate all the expansion groups
46
    // Erase and recreate all the expansion groups
47
47
48
    // If langs is empty and we don't need casediac expansion, then no need to
48
    // If langs is empty and we don't need casediac expansion, then no need to
49
    // walk the big list
49
    // walk the big list
50
    if (langs.empty()) {
50
    if (langs.empty()) {
51
#ifndef RCL_INDEX_STRIPCHARS
52
    if (o_index_stripchars)
51
    if (o_index_stripchars)
53
#endif        
54
        return true;
52
        return true;
55
    }
53
    }
56
54
57
    // Stem dbs
55
    // Stem dbs
58
    vector<XapWritableComputableSynFamMember> stemdbs;
56
    vector<XapWritableComputableSynFamMember> stemdbs;
...
...
66
        XapWritableComputableSynFamMember(wdb, synFamStem, langs[i], 
64
        XapWritableComputableSynFamMember(wdb, synFamStem, langs[i], 
67
                          stemmers.back().getptr()));
65
                          stemmers.back().getptr()));
68
    stemdbs.back().recreate();
66
    stemdbs.back().recreate();
69
    }
67
    }
70
68
71
#ifndef RCL_INDEX_STRIPCHARS
72
    // Unaccented stem dbs
69
    // Unaccented stem dbs
73
    vector<XapWritableComputableSynFamMember> unacstemdbs;
70
    vector<XapWritableComputableSynFamMember> unacstemdbs;
74
    // We can reuse the same stemmer pointers, the objects are stateless.
71
    // We can reuse the same stemmer pointers, the objects are stateless.
75
    if (!o_index_stripchars) {
72
    if (!o_index_stripchars) {
76
    for (unsigned int i = 0; i < langs.size(); i++) {
73
    for (unsigned int i = 0; i < langs.size(); i++) {
...
...
83
    SynTermTransUnac transunac(UNACOP_UNACFOLD);
80
    SynTermTransUnac transunac(UNACOP_UNACFOLD);
84
    XapWritableComputableSynFamMember 
81
    XapWritableComputableSynFamMember 
85
    diacasedb(wdb, synFamDiCa, "all", &transunac);
82
    diacasedb(wdb, synFamDiCa, "all", &transunac);
86
    if (!o_index_stripchars)
83
    if (!o_index_stripchars)
87
    diacasedb.recreate();
84
    diacasedb.recreate();
88
#endif
89
85
90
    // Walk the list of all terms, and stem/unac each.
86
    // Walk the list of all terms, and stem/unac each.
91
    string ermsg;
87
    string ermsg;
92
    try {
88
    try {
93
    Xapian::TermIterator it = wdb.allterms_begin();
89
    Xapian::TermIterator it = wdb.allterms_begin();
...
...
105
        // LOGDEB(("stemskipped: Skipping CJK\n"));
101
        // LOGDEB(("stemskipped: Skipping CJK\n"));
106
        continue;
102
        continue;
107
        }
103
        }
108
104
109
        string lower = *it;
105
        string lower = *it;
110
#ifndef RCL_INDEX_STRIPCHARS
111
        // If the index is raw, compute the case-folded term which
106
        // If the index is raw, compute the case-folded term which
112
        // is the input to the stem db, and add a synonym from the
107
        // is the input to the stem db, and add a synonym from the
113
        // stripped term to the cased and accented one, for accent
108
        // stripped term to the cased and accented one, for accent
114
        // and case expansion at query time
109
        // and case expansion at query time
115
        if (!o_index_stripchars) {
110
        if (!o_index_stripchars) {
116
        unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
111
        unacmaybefold(*it, lower, "UTF-8", UNACOP_FOLD);
117
        diacasedb.addSynonym(*it);
112
        diacasedb.addSynonym(*it);
118
        }
113
        }
119
#endif
120
114
121
        // Dont' apply stemming to terms which don't look like
115
        // Dont' apply stemming to terms which don't look like
122
        // natural language words.
116
        // natural language words.
123
            if (!Db::isSpellingCandidate(*it)) {
117
            if (!Db::isSpellingCandidate(*it)) {
124
                LOGDEB1(("createExpansionDbs: skipped: [%s]\n", (*it).c_str()));
118
                LOGDEB1(("createExpansionDbs: skipped: [%s]\n", (*it).c_str()));
...
...
129
        // lowercase accented term
123
        // lowercase accented term
130
        for (unsigned int i = 0; i < langs.size(); i++) {
124
        for (unsigned int i = 0; i < langs.size(); i++) {
131
        stemdbs[i].addSynonym(lower);
125
        stemdbs[i].addSynonym(lower);
132
        }
126
        }
133
127
134
#ifndef RCL_INDEX_STRIPCHARS
135
        // For a raw index, also maybe create a stem expansion for
128
        // For a raw index, also maybe create a stem expansion for
136
        // the unaccented term. While this may be incorrect, it is
129
        // the unaccented term. While this may be incorrect, it is
137
        // also necessary for searching in a diacritic-unsensitive
130
        // also necessary for searching in a diacritic-unsensitive
138
        // way on a raw index
131
        // way on a raw index
139
        if (!o_index_stripchars) {
132
        if (!o_index_stripchars) {
...
...
143
            for (unsigned int i = 0; i < langs.size(); i++) {
136
            for (unsigned int i = 0; i < langs.size(); i++) {
144
            unacstemdbs[i].addSynonym(unac);
137
            unacstemdbs[i].addSynonym(unac);
145
            }
138
            }
146
        }
139
        }
147
        }
140
        }
148
#endif
149
        }
141
        }
150
    } XCATCHERROR(ermsg);
142
    } XCATCHERROR(ermsg);
151
    if (!ermsg.empty()) {
143
    if (!ermsg.empty()) {
152
        LOGERR(("Db::createStemDb: map build failed: %s\n", ermsg.c_str()));
144
        LOGERR(("Db::createStemDb: map build failed: %s\n", ermsg.c_str()));
153
        return false;
145
        return false;