|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
904 |
LOGDEB2(("Db::add: split file name [%s]\n", fn.c_str()));
|
904 |
LOGDEB2(("Db::add: split file name [%s]\n", fn.c_str()));
|
905 |
if (!splitter.text_to_words(doc.utf8fn))
|
905 |
if (!splitter.text_to_words(doc.utf8fn))
|
906 |
LOGDEB(("Db::addOrUpdate: split failed for file name\n"));
|
906 |
LOGDEB(("Db::addOrUpdate: split failed for file name\n"));
|
907 |
splitter.basepos += splitter.curpos + 100;
|
907 |
splitter.basepos += splitter.curpos + 100;
|
908 |
|
908 |
|
|
|
909 |
// If the ipath is like a path, index the last element. This is
|
|
|
910 |
// for compound documents like zip and chm for which the filter
|
|
|
911 |
// uses the file path as ipath.
|
|
|
912 |
if (!doc.ipath.empty() &&
|
|
|
913 |
doc.ipath.find_first_not_of("0123456789") != string::npos) {
|
|
|
914 |
string utf8ipathlast;
|
|
|
915 |
// There is no way in hell we could have an idea of the
|
|
|
916 |
// charset here, so let's hope it's ascii or utf-8. We call
|
|
|
917 |
// transcode to strip the bad chars and pray
|
|
|
918 |
if (transcode(path_getsimple(doc.ipath), utf8ipathlast,
|
|
|
919 |
"UTF-8", "UTF-8")) {
|
|
|
920 |
splitter.text_to_words(utf8ipathlast);
|
|
|
921 |
splitter.basepos += splitter.curpos + 100;
|
|
|
922 |
}
|
|
|
923 |
}
|
|
|
924 |
|
909 |
// Index textual metadata. These are all indexed as text with
|
925 |
// Index textual metadata. These are all indexed as text with
|
910 |
// positions, as we may want to do phrase searches with them (this
|
926 |
// positions, as we may want to do phrase searches with them (this
|
911 |
// makes no sense for keywords by the way).
|
927 |
// makes no sense for keywords by the way).
|
912 |
//
|
928 |
//
|
913 |
// The order has no importance, and we set a position gap of 100
|
929 |
// The order has no importance, and we set a position gap of 100
|