--- a/src/internfile/internfile.h
+++ b/src/internfile/internfile.h
@@ -26,11 +26,10 @@
using std::map;
using std::set;
+#include "Filter.h"
+// The class changes according to RCL_USE_XATTR
+#include "autoconfig.h"
#include "pathut.h"
-#include "Filter.h"
-// Beware: the class changes according to RCL_USE_XATTR, so any file
-// including this needs autoconfig.h
-#include "autoconfig.h"
class RclConfig;
namespace Rcl {
@@ -64,9 +63,12 @@
};
/**
- * A class to convert data from a datastore (file-system, firefox
- * history, etc.) into possibly one or severaldocuments in internal
- * representation, either for indexing or viewing at query time (gui preview).
+ * Convert data from file-serialized form (either an actual File
+ * System file or a memory image) into one or several documents in
+ * internal representation (Rcl::Doc). This can be used for indexing,
+ * or viewing at query time (GUI preview), or extracting an internal
+ * document out of a compound file into a simple one.
+ *
* Things work a little differently when indexing or previewing:
* - When indexing, all data has to come from the datastore, and it is
* normally desired that all found subdocuments be returned (ie:
@@ -76,26 +78,19 @@
* so that the full doc identifier is passed in: high level url
* (ie: file path) and internal identifier: ipath, ie: message and
* attachment number.
+ *
+ * Internfile is the part of the code which knows about ipath structure.
+ *
+ * The class has a number of static helper method which could just as well not
+ * be members and are in there just for namespace reasons.
+ *
*/
class FileInterner {
public:
- /// Operation modifier flags
+ /** Operation modifier flags */
enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
- /// Return values for internfile()
+ /** Return values for internfile() */
enum Status {FIError, FIDone, FIAgain};
-
- /**
- * Get immediate parent for document.
- *
- * This is not in general the same as the "parent" document used
- * with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file,
- * this would be for exemple the email containing the attachment.
- */
- static bool getEnclosing(const string &url, const string &ipath,
- string &eurl, string &eipath, string& udi);
-
- /** Return last element in ipath, like basename */
- static std::string getLastIpathElt(const std::string& ipath);
/** Constructors take the initial step to preprocess the data object and
* create the top filter */
@@ -106,13 +101,17 @@
* created for previewing a file).
* - Filter output may be different for previewing and indexing.
*
- * @param fn file name
+ * This constructor is now only used for indexing, the form with
+ * an Rcl::Doc parameter to identify the data is always used
+ * at query time.
+ *
+ * @param fn file name.
* @param stp pointer to updated stat struct.
- * @param cnf Recoll configuration
+ * @param cnf Recoll configuration.
* @param td temporary directory to use as working space if
* decompression needed. Must be private and will be wiped clean.
* @param mtype mime type if known. For a compressed file this is the
- * mime type for the uncompressed version.
+ * mime type for the uncompressed version.
*/
FileInterner(const string &fn, const struct stat *stp,
RclConfig *cnf, TempDir &td, int flags,
@@ -121,25 +120,24 @@
/**
* Alternate constructor for the case where the data is in memory.
* This is mainly for data extracted from the web cache. The mime type
- * must be set, input must be uncompressed.
+ * must be set, input must be already uncompressed.
*/
FileInterner(const string &data, RclConfig *cnf, TempDir &td,
int flags, const string& mtype);
/**
- * Alternate constructor for the case where it is not known where
- * the data will come from. We'll use the doc fields and try our
- * best. This is only used at query time, the idoc was built from index
- * data.
+ * Alternate constructor used at query time. We don't know where
+ * the data was stored, this is determined from the Rcl::Doc data
+ *
+ * @param idoc Rcl::Doc object built from index data. The back-end
+ * storage identifier (rclbes field) is used to build the
+ * appropriate fetcher which uses the rest of the Doc fields (url,
+ * ipath...) to retrieve the file or a file reference, which we
+ * then process normally.
*/
FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, TempDir &td,
int flags);
- /**
- * Build sig for doc coming from rcldb. This is here because we know how
- * to query the right backend */
- static bool makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig);
-
~FileInterner();
void setMissingStore(FIMissingStore *st)
@@ -150,8 +148,9 @@
/**
* Turn file or file part into Recoll document.
*
- * For multidocument files (ie: mail folder), this must be called multiple
- * times to retrieve the subdocuments
+ * For multidocument files (ie: mail folder), this must be called
+ * multiple times to retrieve the subdocuments.
+ *
* @param doc output document
* @param ipath internal path. If set by caller, the specified subdoc will
* be returned. Else the next document according to current state will
@@ -169,7 +168,7 @@
/** We normally always return text/plain data. A caller can request
* that we stop conversion at the native document type (ie: extracting
- * an email attachment and starting an external viewer)
+ * an email attachment in its native form for an external viewer)
*/
void setTargetMType(const string& tp) {m_targetMType = tp;}
@@ -182,16 +181,44 @@
we keep it around to save work for our caller, which can get it here */
TempFile get_imgtmp() {return m_imgtmp;}
+ const string& getReason() const
+ {
+ return m_reason;
+ }
+ bool ok() const
+ {
+ return m_ok;
+ }
+
+ /**
+ * Get immediate parent for document.
+ *
+ * This is not in general the same as the "parent" document used
+ * with Rcl::Db::addOrUpdate(). The latter is the enclosing file,
+ * this would be for exemple the email containing the attachment.
+ */
+ static bool getEnclosing(const string &url, const string &ipath,
+ string &eurl, string &eipath, string& udi);
+
+ /** Return last element in ipath, like basename */
+ static std::string getLastIpathElt(const std::string& ipath);
+
+ /**
+ * Build sig for doc coming from rcldb. This is here because we know how
+ * to query the right backend. Used to check up-to-dateness at query time */
+ static bool makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig);
+
/** Extract internal document into temporary file.
* This is used mainly for starting an external viewer for a
- * subdocument (ie: mail attachment).
+ * subdocument (ie: mail attachment). This really would not need to be
+ * a member. It creates a FileInterner object to do the actual work
* @return true for success.
* @param temp output reference-counted temp file object (goes
* away magically). Only used if tofile.empty()
- * @param tofile output file if not null
+ * @param tofile output file if not empty.
* @param cnf The recoll config
- * @param doc Doc data taken from the index. We use it to access the
- * actual document (ie: use mtype, fn, ipath...).
+ * @param doc Doc data taken from the index. We use it to construct a
+ * FileInterner object.
*/
static bool idocToFile(TempFile& temp, const string& tofile,
RclConfig *cnf, const Rcl::Doc& doc);
@@ -209,12 +236,10 @@
static bool maybeUncompressToTemp(TempFile& temp, const string& fn,
RclConfig *cnf, const Rcl::Doc& doc);
- const string& getReason() const {return m_reason;}
static void getMissingExternal(FIMissingStore *st, string& missing);
static void getMissingDescription(FIMissingStore *st, string& desc);
// Parse "missing" file contents into memory struct
static void getMissingFromDescription(FIMissingStore *st, const string& desc);
- bool ok() {return m_ok;}
private:
static const unsigned int MAXHANDLERS = 20;