Switch to unified view

a/src/rcldb/searchdata.cpp b/src/rcldb/searchdata.cpp
...
...
542
    }
542
    }
543
543
544
    bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
544
    bool haswild = term.find_first_of(cstr_minwilds) != string::npos;
545
545
546
    // If there are no wildcards, add term to the list of user-entered terms
546
    // If there are no wildcards, add term to the list of user-entered terms
547
    if (!haswild)
547
    if (!haswild) {
548
    m_hldata.uterms.insert(term);
548
    m_hldata.uterms.insert(term);
549
549
        sterm = term;
550
    }
550
    // No stem expansion if there are wildcards or if prevented by caller
551
    // No stem expansion if there are wildcards or if prevented by caller
551
    bool nostemexp = (mods & SearchDataClause::SDCM_NOSTEMMING) != 0;
552
    bool nostemexp = (mods & SDCM_NOSTEMMING) != 0;
552
    if (haswild || getStemLang().empty()) {
553
    if (haswild || getStemLang().empty()) {
553
    LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
554
    LOGDEB2(("expandTerm: found wildcards or stemlang empty: no exp\n"));
554
    nostemexp = true;
555
    nostemexp = true;
555
    }
556
    }
556
557
557
    // noexpansion can be modified further down by possible case/diac expansion
558
    // noexpansion can be modified further down by possible case/diac expansion
558
    bool noexpansion = nostemexp && !haswild; 
559
    bool noexpansion = nostemexp && !haswild; 
559
560
561
    int termmatchsens = 0;
562
560
#ifndef RCL_INDEX_STRIPCHARS
563
#ifndef RCL_INDEX_STRIPCHARS
561
    bool diac_sensitive = (mods & SearchDataClause::SDCM_DIACSENS) != 0;
564
    bool diac_sensitive = (mods & SDCM_DIACSENS) != 0;
562
    bool case_sensitive = (mods & SearchDataClause::SDCM_CASESENS) != 0;
565
    bool case_sensitive = (mods & SDCM_CASESENS) != 0;
563
566
564
    if (o_index_stripchars) {
567
    if (o_index_stripchars) {
565
    diac_sensitive = case_sensitive = false;
568
    diac_sensitive = case_sensitive = false;
566
    } else {
569
    } else {
567
    // If we are working with a raw index, apply the rules for case and 
570
    // If we are working with a raw index, apply the rules for case and 
...
...
594
    }
597
    }
595
598
596
    if (!case_sensitive || !diac_sensitive)
599
    if (!case_sensitive || !diac_sensitive)
597
        noexpansion = false;
600
        noexpansion = false;
598
    }
601
    }
602
603
    if (case_sensitive)
604
  termmatchsens |= Db::ET_CASESENS;
605
    if (diac_sensitive)
606
  termmatchsens |= Db::ET_DIACSENS;
599
#endif
607
#endif
600
608
601
    if (noexpansion) {
609
    if (noexpansion) {
602
  sterm = term;
603
    oexp.push_back(prefix + term);
610
    oexp.push_back(prefix + term);
604
    m_hldata.terms[term] = m_hldata.uterms.size() - 1;
611
    m_hldata.terms[term] = m_hldata.uterms.size() - 1;
605
    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
612
    LOGDEB(("ExpandTerm: final: %s\n", stringsToString(oexp).c_str()));
606
    return true;
613
    return true;
607
    } 
614
    } 
608
615
609
#ifndef RCL_INDEX_STRIPCHARS
616
    Db::MatchType mtyp = haswild ? Db::ET_WILD : 
610
    // The case/diac expansion db
617
  nostemexp ? Db::ET_NONE : Db::ET_STEM;
611
    SynTermTransUnac unacfoldtrans(UNACOP_UNACFOLD);
612
    XapComputableSynFamMember synac(db.m_ndb->xrdb, synFamDiCa, "all", 
613
                  &unacfoldtrans);
614
#endif // RCL_INDEX_STRIPCHARS
615
616
    TermMatchResult res;
618
    TermMatchResult res;
617
619
    if (!db.termMatch(mtyp | termmatchsens, getStemLang(), term, res, maxexpand,
618
    if (haswild) {
620
            m_field)) {
619
#ifndef RCL_INDEX_STRIPCHARS
621
  // Let it go through
620
  if (!o_index_stripchars && (!diac_sensitive || !case_sensitive)) {
621
      // Perform case/diac expansion on the exp as appropriate and
622
      // expand the result.
623
      vector<string> exp;
624
      if (diac_sensitive) {
625
      // Expand for diacritics and case, filtering for same diacritics
626
      SynTermTransUnac foldtrans(UNACOP_FOLD);
627
      synac.keyWildExpand(term, exp, &foldtrans);
628
      } else if (case_sensitive) {
629
      // Expand for diacritics and case, filtering for same case
630
      SynTermTransUnac unactrans(UNACOP_UNAC);
631
      synac.keyWildExpand(term, exp, &unactrans);
632
      } else {
633
      // Expand for diacritics and case, no filtering
634
      synac.keyWildExpand(term, exp);
635
      }
622
    }
636
      // There are no wildcards in the result from above but
637
      // calling termMatch gets the result into the right form
638
      for (vector<string>::const_iterator it = exp.begin(); 
639
       it != exp.end(); it++) {
640
      db.termMatch(Rcl::Db::ET_WILD, getStemLang(), *it, res, 
641
               maxexpand, m_field);
642
      }
643
  }
644
#endif // RCL_INDEX_STRIPCHARS
645
646
  // Expand the original wildcard expression even if we did the
647
  // case/diac dance above,
648
  db.termMatch(Rcl::Db::ET_WILD, getStemLang(), term, res, 
649
           maxexpand, m_field);
650
  goto termmatchtoresult;
651
    }
652
653
    sterm = term;
654
655
#ifdef RCL_INDEX_STRIPCHARS
656
657
    db.termMatch(Rcl::Db::ET_STEM, getStemLang(), term, res, 
658
       maxexpand, m_field);
659
660
#else
661
662
    if (o_index_stripchars) {
663
  // If the index is stripped, we can only come here if
664
  // nostemexp is unset and we just need stem expansion.
665
  db.termMatch(Rcl::Db::ET_STEM, getStemLang(), term, res, 
666
           maxexpand, m_field);
667
    } else {
668
  vector<string> lexp;
669
  if (diac_sensitive && case_sensitive) {
670
      // No expansion whatsoever. 
671
      lexp.push_back(term);
672
  } else if (diac_sensitive) {
673
      // Expand for accents and case, filtering for same accents,
674
      SynTermTransUnac foldtrans(UNACOP_FOLD);
675
      synac.synExpand(term, lexp, &foldtrans);
676
  } else if (case_sensitive) {
677
      // Expand for accents and case, filtering for same case
678
      SynTermTransUnac unactrans(UNACOP_UNAC);
679
      synac.synExpand(term, lexp, &unactrans);
680
  } else {
681
      // We are neither accent- nor case- sensitive and may need stem
682
      // expansion or not. Expand for accents and case
683
      synac.synExpand(term, lexp);
684
  }
685
686
  if (!nostemexp) {
687
      // Need stem expansion. Lowercase the result of accent and case
688
      // expansion for input to stemdb.
689
      for (unsigned int i = 0; i < lexp.size(); i++) {
690
      string lower;
691
      unacmaybefold(lexp[i], lower, "UTF-8", UNACOP_FOLD);
692
      lexp[i] = lower;
693
      }
694
      sort(lexp.begin(), lexp.end());
695
      lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
696
      StemDb sdb(db.m_ndb->xrdb);
697
      vector<string> exp1;
698
      for (vector<string>::const_iterator it = lexp.begin(); 
699
       it != lexp.end(); it++) {
700
      sdb.stemExpand(getStemLang(), *it, exp1);
701
      }
702
      LOGDEB(("ExpTerm: stem exp-> %s\n", stringsToString(exp1).c_str()));
703
704
      // Expand the resulting list for case (all stemdb content
705
      // is lowercase)
706
      lexp.clear();
707
      for (vector<string>::const_iterator it = exp1.begin(); 
708
       it != exp1.end(); it++) {
709
      synac.synExpand(*it, lexp);
710
      }
711
      sort(lexp.begin(), lexp.end());
712
      lexp.erase(unique(lexp.begin(), lexp.end()), lexp.end());
713
  }
714
715
  // Bogus wildcard expand to generate the result (possibly add prefixes)
716
  LOGDEB(("ExpandTerm:TM: lexp: %s\n", stringsToString(lexp).c_str()));
717
  for (vector<string>::const_iterator it = lexp.begin();
718
       it != lexp.end(); it++) {
719
      db.termMatch(Rcl::Db::ET_WILD, getStemLang(), *it, res,
720
           maxexpand, m_field);
721
  }
722
    }
723
#endif
724
623
725
    // Term match entries to vector of terms
624
    // Term match entries to vector of terms
726
termmatchtoresult:
727
    if (int(res.entries.size()) >= maxexpand && !maxexpissoft) {
625
    if (int(res.entries.size()) >= maxexpand && !maxexpissoft) {
728
    ermsg = "Maximum term expansion size exceeded."
626
    ermsg = "Maximum term expansion size exceeded."
729
        " Maybe increase maxTermExpand.";
627
        " Maybe increase maxTermExpand.";
730
    return false;
628
    return false;
731
    }
629
    }
732
    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
630
    for (vector<TermMatchEntry>::const_iterator it = res.entries.begin(); 
733
     it != res.entries.end(); it++) {
631
     it != res.entries.end(); it++) {
734
    oexp.push_back(it->term);
632
    oexp.push_back(it->term);
735
    }
633
    }
736
    // If the term does not exist at all in the db, the return from
634
    // If the term does not exist at all in the db, the return from
737
    // term match is going to be empty, which is not what we want (we
635
    // termMatch() is going to be empty, which is not what we want (we
738
    // would then compute an empty Xapian query)
636
    // would then compute an empty Xapian query)
739
    if (oexp.empty())
637
    if (oexp.empty())
740
    oexp.push_back(prefix + term);
638
    oexp.push_back(prefix + term);
741
639
742
    // Remember the uterm-to-expansion links
640
    // Remember the uterm-to-expansion links