recoll / Code / Diff of /src/rcldb/rcldb.cpp

Diff of /src/rcldb/rcldb.cpp [061ffd] .. [bb5572]

-a/src/rcldb/rcldb.cpp
+b/src/rcldb/rcldb.cpp
 ...
     LOGDEB2(("Db::add: split file name [%s]\n", fn.c_str()));
     if (!splitter.text_to_words(doc.utf8fn))
         LOGDEB(("Db::addOrUpdate: split failed for file name\n"));
     splitter.basepos += splitter.curpos + 100;
+    // If the ipath is like a path, index the last element. This is
+    // for compound documents like zip and chm for which the filter
+    // uses the file path as ipath.
+    if (!doc.ipath.empty() &&
+  doc.ipath.find_first_not_of("0123456789") != string::npos) {
+  string utf8ipathlast;
+  // There is no way in hell we could have an idea of the
+  // charset here, so let's hope it's ascii or utf-8. We call
+  // transcode to strip the bad chars and pray
+  if (transcode(path_getsimple(doc.ipath), utf8ipathlast,
+            "UTF-8", "UTF-8")) {
+      splitter.text_to_words(utf8ipathlast);
+      splitter.basepos += splitter.curpos + 100;
+  }
+    }
     // Index textual metadata.  These are all indexed as text with
     // positions, as we may want to do phrase searches with them (this
     // makes no sense for keywords by the way).
     //
     // The order has no importance, and we set a position gap of 100