|
a/src/internfile/internfile.cpp |
|
b/src/internfile/internfile.cpp |
|
... |
|
... |
346 |
|
346 |
|
347 |
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
|
347 |
FileInterner::FileInterner(const Rcl::Doc& idoc, RclConfig *cnf,
|
348 |
const string& td, int flags)
|
348 |
const string& td, int flags)
|
349 |
: m_tdir(td)
|
349 |
: m_tdir(td)
|
350 |
{
|
350 |
{
|
|
|
351 |
LOGDEB(("FileInterner::FileInterner(idoc)\n"));
|
351 |
initcommon(cnf, flags);
|
352 |
initcommon(cnf, flags);
|
352 |
|
353 |
|
353 |
// We do insist on having an url...
|
354 |
// We do insist on having an url...
|
354 |
if (idoc.url.empty()) {
|
355 |
if (idoc.url.empty()) {
|
355 |
LOGERR(("FileInterner::FileInterner:: no url!\n"));
|
356 |
LOGERR(("FileInterner::FileInterner:: no url!\n"));
|
|
... |
|
... |
364 |
map<string, string>::const_iterator it;
|
365 |
map<string, string>::const_iterator it;
|
365 |
if ((it = idoc.meta.find(Rcl::Doc::keybcknd)) != idoc.meta.end())
|
366 |
if ((it = idoc.meta.find(Rcl::Doc::keybcknd)) != idoc.meta.end())
|
366 |
backend = it->second;
|
367 |
backend = it->second;
|
367 |
|
368 |
|
368 |
if (backend.empty() || !backend.compare("FS")) {
|
369 |
if (backend.empty() || !backend.compare("FS")) {
|
|
|
370 |
// Filesystem document. Intern from file.
|
369 |
// The url has to be like file://
|
371 |
// The url has to be like file://
|
370 |
if (idoc.url.find("file://") != 0) {
|
372 |
if (idoc.url.find("file://") != 0) {
|
371 |
LOGERR(("FileInterner: FS backend and non fs url: [%s]\n",
|
373 |
LOGERR(("FileInterner: FS backend and non fs url: [%s]\n",
|
372 |
idoc.url.c_str()));
|
374 |
idoc.url.c_str()));
|
373 |
return;
|
375 |
return;
|
374 |
}
|
376 |
}
|
375 |
string fn = idoc.url.substr(7, string::npos);
|
377 |
string fn = idoc.url.substr(7, string::npos);
|
376 |
struct stat st;
|
378 |
struct stat st;
|
377 |
if (stat(fn.c_str(), &st) < 0) {
|
379 |
if (stat(fn.c_str(), &st) < 0) {
|
378 |
LOGERR(("InternFile: cannot access document file: [%s]\n",
|
380 |
LOGERR(("FileInterner:: cannot access document file: [%s]\n",
|
379 |
fn.c_str()));
|
381 |
fn.c_str()));
|
380 |
return;
|
382 |
return;
|
381 |
}
|
383 |
}
|
382 |
init(fn, &st, cnf, td, flags, &idoc.mimetype);
|
384 |
init(fn, &st, cnf, td, flags, &idoc.mimetype);
|
383 |
} else if (!backend.compare("BGL")) {
|
385 |
} else if (!backend.compare("BGL")) {
|
384 |
// Retrieve from our webcache (beagle data)
|
386 |
// Retrieve from our webcache (beagle data). There should
|
|
|
387 |
// probably be a separate object type for readonly cache
|
|
|
388 |
// access (distinct from the one used for indexing).
|
|
|
389 |
// Anyway, we're not called in the same thread as indexing ops, and
|
|
|
390 |
// even, at worse, this would duplicate the memory used. The beagler
|
|
|
391 |
// object is created at the first call of this routine and deleted
|
|
|
392 |
// when the program exits.
|
385 |
BeagleQueueIndexer beagler(cnf);
|
393 |
static BeagleQueueIndexer beagler(cnf);
|
386 |
string data;
|
394 |
string data;
|
387 |
Rcl::Doc dotdoc;
|
395 |
Rcl::Doc dotdoc;
|
388 |
map<string,string>::const_iterator it =
|
396 |
map<string,string>::const_iterator it =
|
389 |
idoc.meta.find(Rcl::Doc::keyudi);
|
397 |
idoc.meta.find(Rcl::Doc::keyudi);
|
390 |
if (it == idoc.meta.end() || it->second.empty()) {
|
398 |
if (it == idoc.meta.end() || it->second.empty()) {
|
391 |
LOGERR(("Internfile: no udi in idoc\n"));
|
399 |
LOGERR(("FileInterner:: no udi in idoc\n"));
|
392 |
return;
|
400 |
return;
|
393 |
}
|
401 |
}
|
394 |
string udi = it->second;
|
402 |
string udi = it->second;
|
395 |
if (!beagler.getFromCache(udi, dotdoc, data)) {
|
403 |
if (!beagler.getFromCache(udi, dotdoc, data)) {
|
396 |
LOGINFO(("Internfile: failed fetch from Beagle cache for [%s]\n",
|
404 |
LOGINFO(("FileInterner:: failed fetch from Beagle cache for [%s]\n",
|
397 |
udi.c_str()));
|
405 |
udi.c_str()));
|
398 |
return;
|
406 |
return;
|
399 |
}
|
407 |
}
|
400 |
if (dotdoc.mimetype.compare(idoc.mimetype)) {
|
408 |
if (dotdoc.mimetype.compare(idoc.mimetype)) {
|
401 |
LOGINFO(("Internfile: udi [%s], mimetype mismatch: in: [%s], bgl "
|
409 |
LOGINFO(("FileInterner:: udi [%s], mimetp mismatch: in: [%s], bgl "
|
402 |
"[%s]\n", idoc.mimetype.c_str(), dotdoc.mimetype.c_str()));
|
410 |
"[%s]\n", idoc.mimetype.c_str(), dotdoc.mimetype.c_str()));
|
403 |
}
|
411 |
}
|
404 |
init(data, cnf, td, flags, dotdoc.mimetype);
|
412 |
init(data, cnf, td, flags, dotdoc.mimetype);
|
405 |
} else {
|
413 |
} else {
|
406 |
LOGERR(("InternFile: unknown backend: [%s]\n", backend.c_str()));
|
414 |
LOGERR(("FileInterner:: unknown backend: [%s]\n", backend.c_str()));
|
407 |
return;
|
415 |
return;
|
408 |
}
|
416 |
}
|
409 |
}
|
417 |
}
|
410 |
|
418 |
|
411 |
FileInterner::~FileInterner()
|
419 |
FileInterner::~FileInterner()
|