|
a/src/python/recoll/pyrecoll.cpp |
|
b/src/python/recoll/pyrecoll.cpp |
|
... |
|
... |
36 |
#include "wasatorcl.h"
|
36 |
#include "wasatorcl.h"
|
37 |
#include "log.h"
|
37 |
#include "log.h"
|
38 |
#include "pathut.h"
|
38 |
#include "pathut.h"
|
39 |
#include "plaintorich.h"
|
39 |
#include "plaintorich.h"
|
40 |
#include "hldata.h"
|
40 |
#include "hldata.h"
|
|
|
41 |
#include "smallut.h"
|
41 |
|
42 |
|
42 |
#include "pyrecoll.h"
|
43 |
#include "pyrecoll.h"
|
43 |
|
44 |
|
44 |
static set<Rcl::Db *> the_dbs;
|
45 |
static set<Rcl::Db *> the_dbs;
|
45 |
static set<Rcl::Query *> the_queries;
|
46 |
static set<Rcl::Query *> the_queries;
|
|
... |
|
... |
426 |
"UTF-8", "replace"));
|
427 |
"UTF-8", "replace"));
|
427 |
}
|
428 |
}
|
428 |
return pdict;
|
429 |
return pdict;
|
429 |
}
|
430 |
}
|
430 |
|
431 |
|
431 |
static bool docget(recoll_DocObject *self, const string& key, string& value)
|
432 |
static bool idocget(recoll_DocObject *self, const string& key, string& value)
|
432 |
{
|
433 |
{
|
433 |
//
|
434 |
switch (key.at(0)) {
|
|
|
435 |
case 'u':
|
|
|
436 |
if (!key.compare(Rcl::Doc::keyurl)) {
|
|
|
437 |
value = self->doc->url;
|
|
|
438 |
return true;
|
|
|
439 |
}
|
|
|
440 |
break;
|
|
|
441 |
case 'f':
|
|
|
442 |
if (!key.compare(Rcl::Doc::keyfs)) {
|
|
|
443 |
value = self->doc->fbytes;
|
|
|
444 |
return true;
|
|
|
445 |
} else if (!key.compare(Rcl::Doc::keyfmt)) {
|
|
|
446 |
value = self->doc->fmtime;
|
|
|
447 |
return true;
|
|
|
448 |
}
|
|
|
449 |
break;
|
|
|
450 |
case 'd':
|
|
|
451 |
if (!key.compare(Rcl::Doc::keyds)) {
|
|
|
452 |
value = self->doc->dbytes;
|
|
|
453 |
return true;
|
|
|
454 |
} else if (!key.compare(Rcl::Doc::keydmt)) {
|
|
|
455 |
value = self->doc->dmtime;
|
|
|
456 |
return true;
|
|
|
457 |
}
|
|
|
458 |
break;
|
|
|
459 |
case 'i':
|
|
|
460 |
if (!key.compare(Rcl::Doc::keyipt)) {
|
|
|
461 |
value = self->doc->ipath;
|
|
|
462 |
return true;
|
|
|
463 |
}
|
|
|
464 |
break;
|
|
|
465 |
case 'm':
|
|
|
466 |
if (!key.compare(Rcl::Doc::keytp)) {
|
|
|
467 |
value = self->doc->mimetype;
|
|
|
468 |
return true;
|
|
|
469 |
} else if (!key.compare(Rcl::Doc::keymt)) {
|
|
|
470 |
value = self->doc->dmtime.empty() ? self->doc->fmtime :
|
|
|
471 |
self->doc->dmtime;
|
|
|
472 |
return true;
|
|
|
473 |
}
|
|
|
474 |
break;
|
|
|
475 |
case 'o':
|
|
|
476 |
if (!key.compare(Rcl::Doc::keyoc)) {
|
|
|
477 |
value = self->doc->origcharset;
|
|
|
478 |
return true;
|
|
|
479 |
}
|
|
|
480 |
break;
|
|
|
481 |
case 's':
|
|
|
482 |
if (!key.compare(Rcl::Doc::keysig)) {
|
|
|
483 |
value = self->doc->sig;
|
|
|
484 |
return true;
|
|
|
485 |
} else if (!key.compare(Rcl::Doc::keysz)) {
|
|
|
486 |
value = self->doc->dbytes.empty() ? self->doc->fbytes :
|
|
|
487 |
self->doc->dbytes;
|
|
|
488 |
return true;
|
|
|
489 |
}
|
|
|
490 |
break;
|
|
|
491 |
case 't':
|
|
|
492 |
if (!key.compare("text")) {
|
|
|
493 |
value = self->doc->text;
|
|
|
494 |
return true;
|
|
|
495 |
}
|
|
|
496 |
break;
|
|
|
497 |
case 'x':
|
434 |
if (!key.compare("xdocid")) {
|
498 |
if (!key.compare("xdocid")) {
|
435 |
char cid[30];
|
499 |
ulltodecstr(self->doc->xdocid, value);
|
436 |
sprintf(cid, "%lu", (unsigned long)self->doc->xdocid);
|
|
|
437 |
value = cid;
|
|
|
438 |
return true;
|
|
|
439 |
} else {
|
|
|
440 |
if (self->doc->getmeta(key, 0)) {
|
|
|
441 |
value = self->doc->meta[key];
|
|
|
442 |
return true;
|
500 |
return true;
|
443 |
}
|
501 |
}
|
|
|
502 |
break;
|
|
|
503 |
}
|
|
|
504 |
|
|
|
505 |
if (self->doc->getmeta(key, 0)) {
|
|
|
506 |
value = self->doc->meta[key];
|
|
|
507 |
return true;
|
444 |
}
|
508 |
}
|
445 |
return false;
|
509 |
return false;
|
446 |
}
|
510 |
}
|
447 |
|
511 |
|
448 |
PyDoc_STRVAR(doc_Doc_get,
|
512 |
PyDoc_STRVAR(doc_Doc_get,
|
|
... |
|
... |
450 |
"Retrieve the named doc attribute\n"
|
514 |
"Retrieve the named doc attribute\n"
|
451 |
);
|
515 |
);
|
452 |
static PyObject *
|
516 |
static PyObject *
|
453 |
Doc_get(recoll_DocObject *self, PyObject *args)
|
517 |
Doc_get(recoll_DocObject *self, PyObject *args)
|
454 |
{
|
518 |
{
|
455 |
LOGDEB0("Doc_get\n" );
|
519 |
LOGDEB1("Doc_get\n" );
|
456 |
if (self->doc == 0 || the_docs.find(self->doc) == the_docs.end()) {
|
520 |
if (self->doc == 0 || the_docs.find(self->doc) == the_docs.end()) {
|
457 |
PyErr_SetString(PyExc_AttributeError, "doc??");
|
521 |
PyErr_SetString(PyExc_AttributeError, "doc??");
|
458 |
return 0;
|
522 |
return 0;
|
459 |
}
|
523 |
}
|
460 |
char *sutf8 = 0; // needs freeing
|
524 |
char *sutf8 = 0; // needs freeing
|
|
... |
|
... |
463 |
}
|
527 |
}
|
464 |
string key(sutf8);
|
528 |
string key(sutf8);
|
465 |
PyMem_Free(sutf8);
|
529 |
PyMem_Free(sutf8);
|
466 |
|
530 |
|
467 |
string value;
|
531 |
string value;
|
468 |
bool found = docget(self, key, value);
|
532 |
if (idocget(self, key, value)) {
|
469 |
|
|
|
470 |
if (found) {
|
|
|
471 |
return PyUnicode_Decode(value.c_str(), value.size(), "UTF-8", "replace");
|
533 |
return PyUnicode_Decode(value.c_str(), value.size(), "UTF-8","replace");
|
472 |
}
|
534 |
}
|
473 |
|
535 |
|
474 |
Py_RETURN_NONE;
|
536 |
Py_RETURN_NONE;
|
475 |
}
|
537 |
}
|
476 |
|
538 |
|
|
... |
|
... |
493 |
if (self->doc == 0 || the_docs.find(self->doc) == the_docs.end()) {
|
555 |
if (self->doc == 0 || the_docs.find(self->doc) == the_docs.end()) {
|
494 |
PyErr_SetString(PyExc_AttributeError, "doc");
|
556 |
PyErr_SetString(PyExc_AttributeError, "doc");
|
495 |
return 0;
|
557 |
return 0;
|
496 |
}
|
558 |
}
|
497 |
|
559 |
|
498 |
bool found = false;
|
560 |
PyObject *meth = PyObject_GenericGetAttr((PyObject*)self, nameobj);
|
499 |
string value;
|
561 |
if (meth) {
|
500 |
string key;
|
562 |
return meth;
|
|
|
563 |
}
|
|
|
564 |
PyErr_Clear();
|
|
|
565 |
|
501 |
char *name = 0;
|
566 |
char *name = 0;
|
502 |
PyObject* utf8o = 0;
|
|
|
503 |
|
|
|
504 |
if (PyUnicode_Check(nameobj)) {
|
567 |
if (PyUnicode_Check(nameobj)) {
|
505 |
utf8o = PyUnicode_AsUTF8String(nameobj);
|
568 |
PyObject* utf8o = PyUnicode_AsUTF8String(nameobj);
|
506 |
if (utf8o == 0) {
|
569 |
if (utf8o == 0) {
|
507 |
LOGERR("Doc_getattro: encoding name to utf8 failed\n" );
|
570 |
LOGERR("Doc_getattro: encoding name to utf8 failed\n" );
|
508 |
PyErr_SetString(PyExc_AttributeError, "name??");
|
571 |
PyErr_SetString(PyExc_AttributeError, "name??");
|
509 |
Py_RETURN_NONE;
|
572 |
Py_RETURN_NONE;
|
510 |
}
|
573 |
}
|
|
... |
|
... |
515 |
} else {
|
578 |
} else {
|
516 |
PyErr_SetString(PyExc_AttributeError, "name not unicode nor string??");
|
579 |
PyErr_SetString(PyExc_AttributeError, "name not unicode nor string??");
|
517 |
Py_RETURN_NONE;
|
580 |
Py_RETURN_NONE;
|
518 |
}
|
581 |
}
|
519 |
|
582 |
|
520 |
key = rclconfig->fieldQCanon(string(name));
|
583 |
string key = rclconfig->fieldQCanon(string(name));
|
521 |
LOGDEB1("Doc_getattro: key: " << key << endl);
|
584 |
string value;
|
522 |
|
585 |
if (idocget(self, key, value)) {
|
523 |
switch (key.at(0)) {
|
|
|
524 |
case 'u':
|
|
|
525 |
if (!key.compare(Rcl::Doc::keyurl)) {
|
|
|
526 |
value = self->doc->url; found = true;
|
|
|
527 |
}
|
|
|
528 |
break;
|
|
|
529 |
case 'f':
|
|
|
530 |
if (!key.compare(Rcl::Doc::keyfs)) {
|
|
|
531 |
value = self->doc->fbytes; found = true;
|
|
|
532 |
} else if (!key.compare(Rcl::Doc::keyfmt)) {
|
|
|
533 |
value = self->doc->fmtime; found = true;
|
|
|
534 |
}
|
|
|
535 |
break;
|
|
|
536 |
case 'd':
|
|
|
537 |
if (!key.compare(Rcl::Doc::keyds)) {
|
|
|
538 |
value = self->doc->dbytes; found = true;
|
|
|
539 |
} else if (!key.compare(Rcl::Doc::keydmt)) {
|
|
|
540 |
value = self->doc->dmtime; found = true;
|
|
|
541 |
}
|
|
|
542 |
break;
|
|
|
543 |
case 'i':
|
|
|
544 |
if (!key.compare(Rcl::Doc::keyipt)) {
|
|
|
545 |
value = self->doc->ipath; found = true;
|
|
|
546 |
}
|
|
|
547 |
break;
|
|
|
548 |
case 'm':
|
|
|
549 |
if (!key.compare(Rcl::Doc::keytp)) {
|
|
|
550 |
value = self->doc->mimetype; found = true;
|
|
|
551 |
} else if (!key.compare(Rcl::Doc::keymt)) {
|
|
|
552 |
value = self->doc->dmtime.empty() ? self->doc->fmtime :
|
|
|
553 |
self->doc->dmtime; found = true;
|
|
|
554 |
}
|
|
|
555 |
break;
|
|
|
556 |
case 'o':
|
|
|
557 |
if (!key.compare(Rcl::Doc::keyoc)) {
|
|
|
558 |
value = self->doc->origcharset; found = true;
|
|
|
559 |
}
|
|
|
560 |
break;
|
|
|
561 |
case 's':
|
|
|
562 |
if (!key.compare(Rcl::Doc::keysig)) {
|
|
|
563 |
value = self->doc->sig; found = true;
|
|
|
564 |
} else if (!key.compare(Rcl::Doc::keysz)) {
|
|
|
565 |
value = self->doc->dbytes.empty() ? self->doc->fbytes :
|
|
|
566 |
self->doc->dbytes; found = true;
|
|
|
567 |
}
|
|
|
568 |
break;
|
|
|
569 |
case 't':
|
|
|
570 |
if (!key.compare("text")) {
|
|
|
571 |
value = self->doc->text; found = true;
|
|
|
572 |
}
|
|
|
573 |
break;
|
|
|
574 |
case 'x':
|
|
|
575 |
if (!key.compare("xdocid")) {
|
|
|
576 |
char cid[30];
|
|
|
577 |
sprintf(cid, "%lu", (unsigned long)self->doc->xdocid);
|
|
|
578 |
value = cid;
|
|
|
579 |
found = true;
|
|
|
580 |
}
|
|
|
581 |
break;
|
|
|
582 |
}
|
|
|
583 |
|
|
|
584 |
if (!found) {
|
|
|
585 |
// This will look up a method name (we have no other standard
|
|
|
586 |
// attributes)
|
|
|
587 |
PyObject *meth = PyObject_GenericGetAttr((PyObject*)self, nameobj);
|
|
|
588 |
if (meth) {
|
|
|
589 |
return meth;
|
|
|
590 |
}
|
|
|
591 |
PyErr_Clear();
|
|
|
592 |
// Else look for another attribute
|
|
|
593 |
if (self->doc->getmeta(key, 0)) {
|
|
|
594 |
value = self->doc->meta[key];
|
|
|
595 |
found = true;
|
|
|
596 |
}
|
|
|
597 |
}
|
|
|
598 |
|
|
|
599 |
if (found) {
|
|
|
600 |
LOGDEB1("Doc_getattro: [" << key << "] -> [" << value << "]\n");
|
586 |
LOGDEB1("Doc_getattro: [" << key << "] -> [" << value << "]\n");
|
601 |
// Return a python unicode object
|
587 |
// Return a python unicode object
|
602 |
return PyUnicode_Decode(value.c_str(), value.size(), "utf-8",
|
588 |
return PyUnicode_Decode(value.c_str(), value.size(), "utf-8","replace");
|
603 |
"replace");
|
|
|
604 |
}
|
589 |
}
|
605 |
|
590 |
|
606 |
Py_RETURN_NONE;
|
591 |
Py_RETURN_NONE;
|
607 |
}
|
592 |
}
|
608 |
|
593 |
|
|
... |
|
... |
737 |
Py_RETURN_NONE;
|
722 |
Py_RETURN_NONE;
|
738 |
}
|
723 |
}
|
739 |
|
724 |
|
740 |
string skey = rclconfig->fieldQCanon(string(name));
|
725 |
string skey = rclconfig->fieldQCanon(string(name));
|
741 |
string value;
|
726 |
string value;
|
742 |
bool found = docget(self, skey, value);
|
727 |
if (idocget(self, skey, value)) {
|
743 |
|
|
|
744 |
if (found) {
|
|
|
745 |
return PyUnicode_Decode(value.c_str(), value.size(), "UTF-8", "replace");
|
728 |
return PyUnicode_Decode(value.c_str(), value.size(), "UTF-8","replace");
|
746 |
}
|
729 |
}
|
747 |
|
730 |
|
748 |
Py_RETURN_NONE;
|
731 |
Py_RETURN_NONE;
|
749 |
}
|
732 |
}
|
750 |
|
733 |
|