|
a/src/rcldb/rcldb.cpp |
|
b/src/rcldb/rcldb.cpp |
|
... |
|
... |
230 |
{
|
230 |
{
|
231 |
LOGDEB2(("Db::dbDataToRclDoc: data:\n%s\n", data.c_str()));
|
231 |
LOGDEB2(("Db::dbDataToRclDoc: data:\n%s\n", data.c_str()));
|
232 |
ConfSimple parms(data);
|
232 |
ConfSimple parms(data);
|
233 |
if (!parms.ok())
|
233 |
if (!parms.ok())
|
234 |
return false;
|
234 |
return false;
|
|
|
235 |
|
|
|
236 |
// Special cases:
|
235 |
parms.get(Doc::keyurl, doc.url);
|
237 |
parms.get(Doc::keyurl, doc.url);
|
236 |
parms.get(Doc::keytp, doc.mimetype);
|
238 |
parms.get(Doc::keytp, doc.mimetype);
|
237 |
parms.get(Doc::keyfmt, doc.fmtime);
|
239 |
parms.get(Doc::keyfmt, doc.fmtime);
|
238 |
parms.get(Doc::keydmt, doc.dmtime);
|
240 |
parms.get(Doc::keydmt, doc.dmtime);
|
239 |
parms.get(Doc::keyoc, doc.origcharset);
|
241 |
parms.get(Doc::keyoc, doc.origcharset);
|
240 |
parms.get(cstr_caption, doc.meta[Doc::keytt]);
|
242 |
parms.get(cstr_caption, doc.meta[Doc::keytt]);
|
241 |
parms.get(Doc::keykw, doc.meta[Doc::keykw]);
|
243 |
|
242 |
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
|
244 |
parms.get(Doc::keyabs, doc.meta[Doc::keyabs]);
|
243 |
// Possibly remove synthetic abstract indicator (if it's there, we
|
245 |
// Possibly remove synthetic abstract indicator (if it's there, we
|
244 |
// used to index the beginning of the text as abstract).
|
246 |
// used to index the beginning of the text as abstract).
|
245 |
doc.syntabs = false;
|
247 |
doc.syntabs = false;
|
246 |
if (doc.meta[Doc::keyabs].find(cstr_syntAbs) == 0) {
|
248 |
if (doc.meta[Doc::keyabs].find(cstr_syntAbs) == 0) {
|
|
|
249 |
doc.meta[Doc::keyabs] =
|
247 |
doc.meta[Doc::keyabs] = doc.meta[Doc::keyabs].substr(cstr_syntAbs.length());
|
250 |
doc.meta[Doc::keyabs].substr(cstr_syntAbs.length());
|
248 |
doc.syntabs = true;
|
251 |
doc.syntabs = true;
|
249 |
}
|
252 |
}
|
250 |
parms.get(Doc::keyipt, doc.ipath);
|
253 |
parms.get(Doc::keyipt, doc.ipath);
|
251 |
parms.get(Doc::keypcs, doc.pcbytes);
|
254 |
parms.get(Doc::keypcs, doc.pcbytes);
|
252 |
parms.get(Doc::keyfs, doc.fbytes);
|
255 |
parms.get(Doc::keyfs, doc.fbytes);
|
253 |
parms.get(Doc::keyds, doc.dbytes);
|
256 |
parms.get(Doc::keyds, doc.dbytes);
|
254 |
parms.get(Doc::keysig, doc.sig);
|
257 |
parms.get(Doc::keysig, doc.sig);
|
255 |
doc.xdocid = docid;
|
258 |
doc.xdocid = docid;
|
256 |
|
259 |
|
257 |
// Other, not predefined meta fields:
|
260 |
// Normal key/value pairs:
|
258 |
vector<string> keys = parms.getNames(string());
|
261 |
vector<string> keys = parms.getNames(string());
|
259 |
for (vector<string>::const_iterator it = keys.begin();
|
262 |
for (vector<string>::const_iterator it = keys.begin();
|
260 |
it != keys.end(); it++) {
|
263 |
it != keys.end(); it++) {
|
261 |
if (doc.meta.find(*it) == doc.meta.end())
|
264 |
if (doc.meta.find(*it) == doc.meta.end())
|
262 |
parms.get(*it, doc.meta[*it]);
|
265 |
parms.get(*it, doc.meta[*it]);
|
|
... |
|
... |
1071 |
RECORD_APPEND(record, cstr_caption, doc.meta[Doc::keytt]);
|
1074 |
RECORD_APPEND(record, cstr_caption, doc.meta[Doc::keytt]);
|
1072 |
|
1075 |
|
1073 |
trimstring(doc.meta[Doc::keykw], " \t\r\n");
|
1076 |
trimstring(doc.meta[Doc::keykw], " \t\r\n");
|
1074 |
doc.meta[Doc::keykw] =
|
1077 |
doc.meta[Doc::keykw] =
|
1075 |
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
|
1078 |
neutchars(truncate_to_word(doc.meta[Doc::keykw], 300), cstr_nc);
|
1076 |
if (!doc.meta[Doc::keykw].empty())
|
1079 |
// No need to explicitly append the keywords, this will be done by
|
1077 |
RECORD_APPEND(record, Doc::keykw, doc.meta[Doc::keykw]);
|
1080 |
// the "stored" loop
|
1078 |
|
1081 |
|
1079 |
// If abstract is empty, we make up one with the beginning of the
|
1082 |
// If abstract is empty, we make up one with the beginning of the
|
1080 |
// document. This is then not indexed, but part of the doc data so
|
1083 |
// document. This is then not indexed, but part of the doc data so
|
1081 |
// that we can return it to a query without having to decode the
|
1084 |
// that we can return it to a query without having to decode the
|
1082 |
// original file.
|
1085 |
// original file.
|
|
... |
|
... |
1092 |
} else {
|
1095 |
} else {
|
1093 |
doc.meta[Doc::keyabs] =
|
1096 |
doc.meta[Doc::keyabs] =
|
1094 |
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
1097 |
neutchars(truncate_to_word(doc.meta[Doc::keyabs], m_idxAbsTruncLen),
|
1095 |
cstr_nc);
|
1098 |
cstr_nc);
|
1096 |
}
|
1099 |
}
|
1097 |
if (!doc.meta[Doc::keyabs].empty())
|
|
|
1098 |
RECORD_APPEND(record, Doc::keyabs, doc.meta[Doc::keyabs]);
|
|
|
1099 |
|
1100 |
|
1100 |
const set<string>& stored = m_config->getStoredFields();
|
1101 |
const set<string>& stored = m_config->getStoredFields();
|
1101 |
for (set<string>::const_iterator it = stored.begin();
|
1102 |
for (set<string>::const_iterator it = stored.begin();
|
1102 |
it != stored.end(); it++) {
|
1103 |
it != stored.end(); it++) {
|
1103 |
string nm = m_config->fieldCanon(*it);
|
1104 |
string nm = m_config->fieldCanon(*it);
|
1104 |
if (!doc.meta[*it].empty()) {
|
1105 |
if (!doc.meta[nm].empty()) {
|
1105 |
string value =
|
1106 |
string value =
|
1106 |
neutchars(truncate_to_word(doc.meta[*it], 150), cstr_nc);
|
1107 |
neutchars(truncate_to_word(doc.meta[nm], 150), cstr_nc);
|
1107 |
RECORD_APPEND(record, nm, value);
|
1108 |
RECORD_APPEND(record, nm, value);
|
1108 |
}
|
1109 |
}
|
1109 |
}
|
1110 |
}
|
1110 |
|
1111 |
|
1111 |
// If empty pages (multiple break at same pos) were recorded, save
|
1112 |
// If empty pages (multiple break at same pos) were recorded, save
|