Switch to unified view

a/src/rcldb/rcldb.cpp b/src/rcldb/rcldb.cpp
...
...
230
{
230
{
231
    LOGDEB2(("Db::dbDataToRclDoc: data:\n%s\n", data.c_str()));
231
    LOGDEB2(("Db::dbDataToRclDoc: data:\n%s\n", data.c_str()));
232
    ConfSimple parms(data);
232
    ConfSimple parms(data);
233
    if (!parms.ok())
233
    if (!parms.ok())
234
    return false;
234
    return false;
235
236
    // Special cases:
235
    parms.get(Doc::keyurl, doc.url);
237
    parms.get(Doc::keyurl, doc.url);
236
    parms.get(Doc::keytp, doc.mimetype);
238
    parms.get(Doc::keytp, doc.mimetype);
237
    parms.get(Doc::keyfmt, doc.fmtime);
239
    parms.get(Doc::keyfmt, doc.fmtime);
238
    parms.get(Doc::keydmt, doc.dmtime);
240
    parms.get(Doc::keydmt, doc.dmtime);
239
    parms.get(Doc::keyoc, doc.origcharset);
241
    parms.get(Doc::keyoc, doc.origcharset);
240
    parms.get(cstr_caption, doc.meta[Doc::keytt]);
242
    parms.get(cstr_caption, doc.meta[Doc::keytt]);
241
    parms.get(Doc::keykw, doc.meta[Doc::keykw]);
243
242
    parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
244
    parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
243
    // Possibly remove synthetic abstract indicator (if it's there, we
245
    // Possibly remove synthetic abstract indicator (if it's there, we
244
    // used to index the beginning of the text as abstract).
246
    // used to index the beginning of the text as abstract).
245
    doc.syntabs = false;
247
    doc.syntabs = false;
246
    if (doc.meta[Doc::keyabs].find(cstr_syntAbs) == 0) {
248
    if (doc.meta[Doc::keyabs].find(cstr_syntAbs) == 0) {
249
  doc.meta[Doc::keyabs] = 
247
    doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(cstr_syntAbs.length());
250
        doc.meta[Doc::keyabs].substr(cstr_syntAbs.length());
248
    doc.syntabs = true;
251
    doc.syntabs = true;
249
    }
252
    }
250
    parms.get(Doc::keyipt, doc.ipath);
253
    parms.get(Doc::keyipt, doc.ipath);
251
    parms.get(Doc::keypcs, doc.pcbytes);
254
    parms.get(Doc::keypcs, doc.pcbytes);
252
    parms.get(Doc::keyfs, doc.fbytes);
255
    parms.get(Doc::keyfs, doc.fbytes);
253
    parms.get(Doc::keyds, doc.dbytes);
256
    parms.get(Doc::keyds, doc.dbytes);
254
    parms.get(Doc::keysig, doc.sig);
257
    parms.get(Doc::keysig, doc.sig);
255
    doc.xdocid = docid;
258
    doc.xdocid = docid;
256
259
257
    // Other, not predefined meta fields:
260
    // Normal key/value pairs:
258
    vector<string> keys = parms.getNames(string());
261
    vector<string> keys = parms.getNames(string());
259
    for (vector<string>::const_iterator it = keys.begin(); 
262
    for (vector<string>::const_iterator it = keys.begin(); 
260
     it != keys.end(); it++) {
263
     it != keys.end(); it++) {
261
    if (doc.meta.find(*it) == doc.meta.end())
264
    if (doc.meta.find(*it) == doc.meta.end())
262
        parms.get(*it, doc.meta[*it]);
265
        parms.get(*it, doc.meta[*it]);
...
...
1071
    RECORD_APPEND(record, cstr_caption, doc.meta[Doc::keytt]);
1074
    RECORD_APPEND(record, cstr_caption, doc.meta[Doc::keytt]);
1072
1075
1073
    trimstring(doc.meta[Doc::keykw], " \t\r\n");
1076
    trimstring(doc.meta[Doc::keykw], " \t\r\n");
1074
    doc.meta[Doc::keykw] = 
1077
    doc.meta[Doc::keykw] = 
1075
    neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
1078
    neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
1076
    if (!doc.meta[Doc::keykw].empty())
1079
    // No need to explicitly append the keywords, this will be done by 
1077
  RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);
1080
    // the "stored" loop
1078
1081
1079
    // If abstract is empty, we make up one with the beginning of the
1082
    // If abstract is empty, we make up one with the beginning of the
1080
    // document. This is then not indexed, but part of the doc data so
1083
    // document. This is then not indexed, but part of the doc data so
1081
    // that we can return it to a query without having to decode the
1084
    // that we can return it to a query without having to decode the
1082
    // original file.
1085
    // original file.
...
...
1092
    } else {
1095
    } else {
1093
    doc.meta[Doc::keyabs] = 
1096
    doc.meta[Doc::keyabs] = 
1094
        neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
1097
        neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
1095
              cstr_nc);
1098
              cstr_nc);
1096
    }
1099
    }
1097
    if (!doc.meta[Doc::keyabs].empty())
1098
  RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
1099
1100
1100
    const set<string>& stored = m_config->getStoredFields();
1101
    const set<string>& stored = m_config->getStoredFields();
1101
    for (set<string>::const_iterator it = stored.begin();
1102
    for (set<string>::const_iterator it = stored.begin();
1102
     it != stored.end(); it++) {
1103
     it != stored.end(); it++) {
1103
    string nm = m_config->fieldCanon(*it);
1104
    string nm = m_config->fieldCanon(*it);
1104
    if (!doc.meta[*it].empty()) {
1105
    if (!doc.meta[nm].empty()) {
1105
        string value = 
1106
        string value = 
1106
        neutchars(truncate_to_word(doc.meta[*it], 150), cstr_nc);
1107
        neutchars(truncate_to_word(doc.meta[nm], 150), cstr_nc);
1107
        RECORD_APPEND(record, nm, value);
1108
        RECORD_APPEND(record, nm, value);
1108
    }
1109
    }
1109
    }
1110
    }
1110
1111
1111
    // If empty pages (multiple break at same pos) were recorded, save
1112
    // If empty pages (multiple break at same pos) were recorded, save