Switch to unified view

a/src/rcldb/stemdb.cpp b/src/rcldb/stemdb.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.3 2006-09-19 14:30:39 dockes Exp $ (C) 2005 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: stemdb.cpp,v 1.4 2006-09-20 06:21:43 dockes Exp $ (C) 2005 J.F.Dockes";
3
#endif
3
#endif
4
4
5
/**
5
/**
6
 * Management of the auxiliary databases listing stems and their expansion 
6
 * Management of the auxiliary databases listing stems and their expansion 
7
 * terms
7
 * terms
...
...
65
        rmdir(dir.c_str());
65
        rmdir(dir.c_str());
66
    }
66
    }
67
    }
67
    }
68
};
68
};
69
69
70
// Deciding if we try to stem the term. If it has numerals or capitals
71
// we don't
72
inline static bool
70
inline static bool
73
p_notlowerascii(unsigned int c)
71
p_notlowerascii(unsigned int c)
74
{
72
{
75
    if (c < 'a' || (c > 'z' && c < 128))
73
    if (c < 'a' || (c > 'z' && c < 128))
76
    return true;
74
    return true;
...
...
103
    try {
101
    try {
104
    Xapian::Stem stemmer(lang);
102
    Xapian::Stem stemmer(lang);
105
    Xapian::TermIterator it;
103
    Xapian::TermIterator it;
106
    for (it = xdb.allterms_begin(); 
104
    for (it = xdb.allterms_begin(); 
107
         it != xdb.allterms_end(); it++) {
105
         it != xdb.allterms_end(); it++) {
108
      // If it has any non-lowercase 7bit char, cant be stemmable
106
      // Deciding if we try to stem the term. If it has any
107
      // non-lowercase 7bit char, dont. Note that
108
      // as we are dealing with unaccented data, we are still
109
      // processing most of western european languages (where
110
      // most unaccented letters are ascii)
109
        string::iterator sit = (*it).begin(), eit = sit + (*it).length();
111
        string::iterator sit = (*it).begin(), eit = sit + (*it).length();
110
        if ((sit = find_if(sit, eit, p_notlowerascii)) != eit) {
112
        if ((sit = find_if(sit, eit, p_notlowerascii)) != eit) {
111
        ++nostem;
113
        ++nostem;
112
        // LOGDEB(("stemskipped: [%s], because of 0x%x\n", 
114
        // LOGDEB(("stemskipped: [%s], because of 0x%x\n", 
113
        // (*it).c_str(), *sit));
115
        // (*it).c_str(), *sit));