Switch to unified view

a/src/internfile/internfile.h b/src/internfile/internfile.h
...
...
24
using std::string;
24
using std::string;
25
using std::vector;
25
using std::vector;
26
using std::map;
26
using std::map;
27
using std::set;
27
using std::set;
28
28
29
#include "Filter.h"
30
// The class changes according to RCL_USE_XATTR
31
#include "autoconfig.h"
29
#include "pathut.h"
32
#include "pathut.h"
30
#include "Filter.h"
31
// Beware: the class changes according to RCL_USE_XATTR, so any file
32
// including this needs autoconfig.h
33
#include "autoconfig.h"
34
33
35
class RclConfig;
34
class RclConfig;
36
namespace Rcl {
35
namespace Rcl {
37
    class Doc;
36
    class Doc;
38
}
37
}
...
...
62
    // Missing external programs
61
    // Missing external programs
63
    map<string, set<string> > m_typesForMissing;
62
    map<string, set<string> > m_typesForMissing;
64
};
63
};
65
64
66
/** 
65
/** 
67
 * A class to convert data from a datastore (file-system, firefox
66
 * Convert data from file-serialized form (either an actual File
68
 * history, etc.)  into possibly one or severaldocuments in internal
67
 * System file or a memory image) into one or several documents in
69
 * representation, either for indexing or viewing at query time (gui preview).
68
 * internal representation (Rcl::Doc). This can be used for indexing,
69
 * or viewing at query time (GUI preview), or extracting an internal
70
 * document out of a compound file into a simple one.
71
 *
70
 * Things work a little differently when indexing or previewing:
72
 * Things work a little differently when indexing or previewing:
71
 *  - When indexing, all data has to come from the datastore, and it is 
73
 *  - When indexing, all data has to come from the datastore, and it is 
72
 *    normally desired that all found subdocuments be returned (ie:
74
 *    normally desired that all found subdocuments be returned (ie:
73
 *    all messages and attachments out of a single file mail folder)
75
 *    all messages and attachments out of a single file mail folder)
74
 *  - When previewing, some data is taken from the index (ie: the mime type 
76
 *  - When previewing, some data is taken from the index (ie: the mime type 
75
 *    is already known, and a single document usually needs to be processed,
77
 *    is already known, and a single document usually needs to be processed,
76
 *    so that the full doc identifier is passed in: high level url 
78
 *    so that the full doc identifier is passed in: high level url 
77
 *    (ie: file path) and internal identifier: ipath, ie: message and 
79
 *    (ie: file path) and internal identifier: ipath, ie: message and 
78
 *    attachment number.
80
 *    attachment number.
81
 *
82
 * Internfile is the part of the code which knows about ipath structure. 
83
 *
84
 * The class has a number of static helper method which could just as well not
85
 * be members and are in there just for namespace reasons.
86
 * 
79
 */
87
 */
80
class FileInterner {
88
class FileInterner {
81
 public:
89
 public:
82
    /// Operation modifier flags
90
    /** Operation modifier flags */
83
    enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
91
    enum Flags {FIF_none, FIF_forPreview, FIF_doUseInputMimetype};
84
    /// Return values for internfile()
92
    /** Return values for internfile() */
85
    enum Status {FIError, FIDone, FIAgain};
93
    enum Status {FIError, FIDone, FIAgain};
86
87
    /**
88
     * Get immediate parent for document. 
89
     *
90
     * This is not in general the same as the "parent" document used 
91
     * with Rcl::Db::addOrUpdate(). The latter is generally the enclosing file,
92
     * this would be for exemple the email containing the attachment.
93
     */
94
    static bool getEnclosing(const string &url, const string &ipath,
95
               string &eurl, string &eipath, string& udi);
96
97
    /** Return last element in ipath, like basename */
98
    static std::string getLastIpathElt(const std::string& ipath);
99
94
100
    /** Constructors take the initial step to preprocess the data object and
95
    /** Constructors take the initial step to preprocess the data object and
101
     *  create the top filter */
96
     *  create the top filter */
102
97
103
    /**
98
    /**
104
     * Identify and possibly decompress file, and create the top filter.
99
     * Identify and possibly decompress file, and create the top filter.
105
     * - The mtype parameter is not always set (it is when the object is
100
     * - The mtype parameter is not always set (it is when the object is
106
     *   created for previewing a file). 
101
     *   created for previewing a file). 
107
     * - Filter output may be different for previewing and indexing.
102
     * - Filter output may be different for previewing and indexing.
108
     *
103
     *
104
     * This constructor is now only used for indexing, the form with
105
     * an Rcl::Doc parameter to identify the data is always used
106
     * at query time.
107
     *
109
     * @param fn file name 
108
     * @param fn file name.
110
     * @param stp pointer to updated stat struct.
109
     * @param stp pointer to updated stat struct.
111
     * @param cnf Recoll configuration
110
     * @param cnf Recoll configuration.
112
     * @param td  temporary directory to use as working space if 
111
     * @param td  temporary directory to use as working space if 
113
     *   decompression needed. Must be private and will be wiped clean.
112
     *   decompression needed. Must be private and will be wiped clean.
114
     * @param mtype mime type if known. For a compressed file this is the 
113
     * @param mtype mime type if known. For a compressed file this is the 
115
     *   mime type for the uncompressed version. 
114
     *   mime type for the uncompressed version.
116
     */
115
     */
117
    FileInterner(const string &fn, const struct stat *stp, 
116
    FileInterner(const string &fn, const struct stat *stp, 
118
         RclConfig *cnf, TempDir &td, int flags,
117
         RclConfig *cnf, TempDir &td, int flags,
119
         const string *mtype = 0);
118
         const string *mtype = 0);
120
    
119
    
121
    /** 
120
    /** 
122
     * Alternate constructor for the case where the data is in memory.
121
     * Alternate constructor for the case where the data is in memory.
123
     * This is mainly for data extracted from the web cache. The mime type
122
     * This is mainly for data extracted from the web cache. The mime type
124
     * must be set, input must be uncompressed.
123
     * must be set, input must be already uncompressed.
125
     */
124
     */
126
    FileInterner(const string &data, RclConfig *cnf, TempDir &td, 
125
    FileInterner(const string &data, RclConfig *cnf, TempDir &td, 
127
                 int flags, const string& mtype);
126
                 int flags, const string& mtype);
128
127
129
    /**
128
    /**
130
     * Alternate constructor for the case where it is not known where
129
     * Alternate constructor used at query time. We don't know where
131
     * the data will come from. We'll use the doc fields and try our
130
     * the data was stored, this is determined from the Rcl::Doc data
132
     * best. This is only used at query time, the idoc was built from index 
133
     * data.
131
     * 
132
     * @param idoc Rcl::Doc object built from index data. The back-end
133
     *   storage identifier (rclbes field) is used to build the
134
     *   appropriate fetcher which uses the rest of the Doc fields (url,
135
     *   ipath...) to retrieve the file or a file reference, which we
136
     *   then process normally.
134
     */
137
     */
135
    FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, TempDir &td, 
138
    FileInterner(const Rcl::Doc& idoc, RclConfig *cnf, TempDir &td, 
136
                 int flags);
139
                 int flags);
137
140
138
    /** 
139
     * Build sig for doc coming from rcldb. This is here because we know how
140
     * to query the right backend */
141
    static bool makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig);
142
143
    ~FileInterner();
141
    ~FileInterner();
144
142
145
    void setMissingStore(FIMissingStore *st)
143
    void setMissingStore(FIMissingStore *st)
146
    {
144
    {
147
    m_missingdatap = st;
145
    m_missingdatap = st;
148
    }
146
    }
149
147
150
    /** 
148
    /** 
151
     * Turn file or file part into Recoll document.
149
     * Turn file or file part into Recoll document.
152
     * 
150
     * 
153
     * For multidocument files (ie: mail folder), this must be called multiple
151
     * For multidocument files (ie: mail folder), this must be called
154
     * times to retrieve the subdocuments
152
     * multiple times to retrieve the subdocuments.
153
     *
155
     * @param doc output document
154
     * @param doc output document
156
     * @param ipath internal path. If set by caller, the specified subdoc will
155
     * @param ipath internal path. If set by caller, the specified subdoc will
157
     *  be returned. Else the next document according to current state will 
156
     *  be returned. Else the next document according to current state will 
158
     *  be returned, and doc.ipath will be set on output.
157
     *  be returned, and doc.ipath will be set on output.
159
     * @return FIError and FIDone are self-explanatory. If FIAgain is returned,
158
     * @return FIError and FIDone are self-explanatory. If FIAgain is returned,
...
...
167
     */ 
166
     */ 
168
    const string&  getMimetype() {return m_mimetype;}
167
    const string&  getMimetype() {return m_mimetype;}
169
168
170
    /** We normally always return text/plain data. A caller can request
169
    /** We normally always return text/plain data. A caller can request
171
     *  that we stop conversion at the native document type (ie: extracting
170
     *  that we stop conversion at the native document type (ie: extracting
172
     *  an email attachment and starting an external viewer)
171
     *  an email attachment in its native form for an external viewer)
173
     */
172
     */
174
    void setTargetMType(const string& tp) {m_targetMType = tp;}
173
    void setTargetMType(const string& tp) {m_targetMType = tp;}
175
174
176
    /** In case we see an html version while converting, it is set aside 
175
    /** In case we see an html version while converting, it is set aside 
177
     *  and can be recovered 
176
     *  and can be recovered 
...
...
180
179
181
    /** If we happen to be processing an image file and need a temp file,
180
    /** If we happen to be processing an image file and need a temp file,
182
    we keep it around to save work for our caller, which can get it here */
181
    we keep it around to save work for our caller, which can get it here */
183
    TempFile get_imgtmp() {return m_imgtmp;}
182
    TempFile get_imgtmp() {return m_imgtmp;}
184
183
184
    const string& getReason() const 
185
    {
186
  return m_reason;
187
    }
188
    bool ok() const
189
    {
190
  return m_ok;
191
    }
192
193
    /**
194
     * Get immediate parent for document. 
195
     *
196
     * This is not in general the same as the "parent" document used 
197
     * with Rcl::Db::addOrUpdate(). The latter is the enclosing file,
198
     * this would be for exemple the email containing the attachment.
199
     */
200
    static bool getEnclosing(const string &url, const string &ipath,
201
               string &eurl, string &eipath, string& udi);
202
203
    /** Return last element in ipath, like basename */
204
    static std::string getLastIpathElt(const std::string& ipath);
205
206
    /** 
207
     * Build sig for doc coming from rcldb. This is here because we know how
208
     * to query the right backend. Used to check up-to-dateness at query time */
209
    static bool makesig(RclConfig *cnf, const Rcl::Doc& idoc, string& sig);
210
185
    /** Extract internal document into temporary file. 
211
    /** Extract internal document into temporary file. 
186
     *  This is used mainly for starting an external viewer for a
212
     *  This is used mainly for starting an external viewer for a
187
     *  subdocument (ie: mail attachment).
213
     *  subdocument (ie: mail attachment). This really would not need to be
214
     *  a member. It creates a FileInterner object to do the actual work
188
     * @return true for success.
215
     * @return true for success.
189
     * @param temp output reference-counted temp file object (goes
216
     * @param temp output reference-counted temp file object (goes
190
     *   away magically). Only used if tofile.empty()
217
     *   away magically). Only used if tofile.empty()
191
     * @param tofile output file if not null
218
     * @param tofile output file if not empty.
192
     * @param cnf The recoll config
219
     * @param cnf The recoll config
193
     * @param doc Doc data taken from the index. We use it to access the 
220
     * @param doc Doc data taken from the index. We use it to construct a
194
     *            actual document (ie: use mtype, fn, ipath...).
221
     *    FileInterner object.
195
     */
222
     */
196
    static bool idocToFile(TempFile& temp, const string& tofile, 
223
    static bool idocToFile(TempFile& temp, const string& tofile, 
197
               RclConfig *cnf, const Rcl::Doc& doc);
224
               RclConfig *cnf, const Rcl::Doc& doc);
198
225
199
    /** 
226
    /** 
...
...
207
     *  by the TempFile status (!isNull())
234
     *  by the TempFile status (!isNull())
208
     */
235
     */
209
    static bool maybeUncompressToTemp(TempFile& temp, const string& fn, 
236
    static bool maybeUncompressToTemp(TempFile& temp, const string& fn, 
210
                                      RclConfig *cnf, const Rcl::Doc& doc);
237
                                      RclConfig *cnf, const Rcl::Doc& doc);
211
238
212
    const string& getReason() const {return m_reason;}
213
    static void getMissingExternal(FIMissingStore *st, string& missing);
239
    static void getMissingExternal(FIMissingStore *st, string& missing);
214
    static void getMissingDescription(FIMissingStore *st, string& desc);
240
    static void getMissingDescription(FIMissingStore *st, string& desc);
215
    // Parse "missing" file contents into memory struct
241
    // Parse "missing" file contents into memory struct
216
    static void getMissingFromDescription(FIMissingStore *st, const string& desc);
242
    static void getMissingFromDescription(FIMissingStore *st, const string& desc);
217
    bool ok() {return m_ok;}
218
243
219
 private:
244
 private:
220
    static const unsigned int MAXHANDLERS = 20;
245
    static const unsigned int MAXHANDLERS = 20;
221
    RclConfig             *m_cfg;
246
    RclConfig             *m_cfg;
222
    string                 m_fn;
247
    string                 m_fn;