|
a/src/rcldb/searchdata.cpp |
|
b/src/rcldb/searchdata.cpp |
1 |
#ifndef lint
|
1 |
#ifndef lint
|
2 |
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.29 2008-12-15 14:39:52 dockes Exp $ (C) 2006 J.F.Dockes";
|
2 |
static char rcsid[] = "@(#$Id: searchdata.cpp,v 1.30 2008-12-17 14:26:09 dockes Exp $ (C) 2006 J.F.Dockes";
|
3 |
#endif
|
3 |
#endif
|
4 |
/*
|
4 |
/*
|
5 |
* This program is free software; you can redistribute it and/or modify
|
5 |
* This program is free software; you can redistribute it and/or modify
|
6 |
* it under the terms of the GNU General Public License as published by
|
6 |
* it under the terms of the GNU General Public License as published by
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
7 |
* the Free Software Foundation; either version 2 of the License, or
|
|
... |
|
... |
481 |
// at least one composite, we have to increase the slack
|
481 |
// at least one composite, we have to increase the slack
|
482 |
// else a phrase query including a span would fail.
|
482 |
// else a phrase query including a span would fail.
|
483 |
// Ex: "term0@term1 term2" is onlyspans-split as:
|
483 |
// Ex: "term0@term1 term2" is onlyspans-split as:
|
484 |
// 0 term0@term1 0 12
|
484 |
// 0 term0@term1 0 12
|
485 |
// 2 term2 13 18
|
485 |
// 2 term2 13 18
|
486 |
// The position of term1 is 2, not 1, so a phrase search
|
486 |
// The position of term2 is 2, not 1, so a phrase search
|
487 |
// would fail.
|
487 |
// would fail.
|
488 |
// We used to do word split, searching for
|
488 |
// We used to do word split, searching for
|
489 |
// "term0 term01 term1" instead, which may have worse
|
489 |
// "term0 term1 term2" instead, which may have worse
|
490 |
// performance, but will succeed.
|
490 |
// performance, but will succeed.
|
491 |
// We now adjust the phrase/near slack by the term count
|
491 |
// We now adjust the phrase/near slack by the term count
|
492 |
// difference (this is mainly better for cjk where this is a very
|
492 |
// difference (this is mainly better for cjk where this is a very
|
493 |
// common occurrence because of the ngrams thing.
|
493 |
// common occurrence because of the ngrams thing.
|
494 |
wsQData splitDataS(stops), splitDataW(stops);
|
494 |
wsQData splitDataS(stops), splitDataW(stops);
|