Switch to unified view

a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp
1
#ifndef lint
1
#ifndef lint
2
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.17 2007-06-22 06:14:04 dockes Exp $ (C) 2006 J.F.Dockes";
2
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.18 2007-09-20 08:43:12 dockes Exp $ (C) 2006 J.F.Dockes";
3
#endif
3
#endif
4
/*
4
/*
5
 *   This program is free software; you can redistribute it and/or modify
5
 *   This program is free software; you can redistribute it and/or modify
6
 *   it under the terms of the GNU General Public License as published by
6
 *   it under the terms of the GNU General Public License as published by
7
 *   the Free Software Foundation; either version 2 of the License, or
7
 *   the Free Software Foundation; either version 2 of the License, or
...
...
364
    try {
364
    try {
365
    for (list<string>::iterator it = phrases.begin(); 
365
    for (list<string>::iterator it = phrases.begin(); 
366
         it != phrases.end(); it++) {
366
         it != phrases.end(); it++) {
367
        LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
367
        LOGDEB(("strToXapianQ: phrase or word: [%s]\n", it->c_str()));
368
368
369
      // If there are both spans and single words in this element,
369
      // If there are multiple spans in this element, including
370
      // we need to use a word split, else a phrase query including
370
      // at least one composite, we need to use a word split,
371
      // a span would fail if we didn't adjust the proximity to
371
      // else a phrase query including a span would fail. 
372
      // account for the additional span term which is complicated.
372
      // (other possible solution: adjust slack to account for the
373
      //  additional position increase?)
374
      // Ex: "term0@term01 term1" is onlyspans-split as:
375
      //   0 term0@term01            0   12
376
      //   2 term1                  13   18
377
      // The position of term1 is 2, not 1, so the phrase search would
378
      // fail. We search for "term0 term01 term1" instead, which may 
379
      // have worse performance, but will succeed.
373
        wsQData splitDataS(stops), splitDataW(stops);
380
        wsQData splitDataS(stops), splitDataW(stops);
374
        TextSplit splitterS(&splitDataS, (TextSplit::Flags)
381
        TextSplit splitterS(&splitDataS, 
375
                (TextSplit::TXTS_ONLYSPANS | 
382
                TextSplit::Flags(TextSplit::TXTS_ONLYSPANS | 
376
                 TextSplit::TXTS_KEEPWILD));
383
                      TextSplit::TXTS_KEEPWILD));
377
        splitterS.text_to_words(*it);
384
        splitterS.text_to_words(*it);
378
        TextSplit splitterW(&splitDataW, (TextSplit::Flags)
385
        TextSplit splitterW(&splitDataW, 
379
                (TextSplit::TXTS_NOSPANS | 
386
                TextSplit::Flags(TextSplit::TXTS_NOSPANS | 
380
                 TextSplit::TXTS_KEEPWILD));
387
                      TextSplit::TXTS_KEEPWILD));
381
        splitterW.text_to_words(*it);
388
        splitterW.text_to_words(*it);
382
        wsQData *splitData = &splitDataS;
389
        wsQData *splitData = &splitDataS;
383
        if (splitDataS.terms.size() > 1 && 
390
        if (splitDataS.terms.size() > 1 && 
384
        splitDataS.terms.size() != splitDataW.terms.size())
391
        splitDataS.terms.size() != splitDataW.terms.size())
385
        splitData = &splitDataW;
392
        splitData = &splitDataW;
...
...
387
        LOGDEB(("strToXapianQ: splitter term count: %d\n", 
394
        LOGDEB(("strToXapianQ: splitter term count: %d\n", 
388
             splitData->terms.size()));
395
             splitData->terms.size()));
389
        switch (splitData->terms.size()) {
396
        switch (splitData->terms.size()) {
390
        case 0: continue;// ??
397
        case 0: continue;// ??
391
        case 1: 
398
        case 1: 
392
      // Not a real phrase: one term. Still may be expanded
399
      // Just a term. Still may be expanded (by stem or
393
      // (stem or wildcard)
400
      // wildcard) to an OR list.
394
        {
401
        {
395
            string term = splitData->terms.front();
402
            string term = splitData->terms.front();
396
            list<string> exp;  
403
            list<string> exp;  
397
          string sterm;
404
          string sterm; // dumb version of user term
398
            stripExpandTerm(false, term, exp, sterm);
405
            stripExpandTerm(false, term, exp, sterm);
399
            m_terms.insert(m_terms.end(), exp.begin(), exp.end());
406
            m_terms.insert(m_terms.end(), exp.begin(), exp.end());
400
            // Push either term or OR of stem-expanded set
407
            // Push either term or OR of stem-expanded set
401
            addPrefix(exp, prefix);
408
            addPrefix(exp, prefix);
402
            Xapian::Query xq(Xapian::Query::OP_OR, 
409
            Xapian::Query xq(Xapian::Query::OP_OR, 
...
...
415
            pqueries.push_back(xq);
422
            pqueries.push_back(xq);
416
        }
423
        }
417
        break;
424
        break;
418
425
419
        default:
426
        default:
420
      // Phrase/near: transform into a PHRASE or NEAR xapian
427
      // Element had several terms: transform into a PHRASE
421
      // query, the element of which can themselves be OR
428
      // or NEAR xapian query, the elements of which can
422
      // queries if the terms get expanded by stemming or
429
      // themselves be OR queries if the terms get expanded
423
      // wildcards (we don't do stemming for PHRASE though)
430
      // by stemming or wildcards (we don't do stemming for
431
      // PHRASE though)
424
        Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
432
        Xapian::Query::op op = useNear ? Xapian::Query::OP_NEAR : 
425
        Xapian::Query::OP_PHRASE;
433
        Xapian::Query::OP_PHRASE;
426
        list<Xapian::Query> orqueries;
434
        list<Xapian::Query> orqueries;
427
        bool hadmultiple = false;
435
        bool hadmultiple = false;
428
        vector<vector<string> >groups;
436
        vector<vector<string> >groups;