|
a/src/python/recoll/pyrecoll.cpp |
|
b/src/python/recoll/pyrecoll.cpp |
|
... |
|
... |
13 |
* along with this program; if not, write to the
|
13 |
* along with this program; if not, write to the
|
14 |
* Free Software Foundation, Inc.,
|
14 |
* Free Software Foundation, Inc.,
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
15 |
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
16 |
*/
|
16 |
*/
|
17 |
|
17 |
|
18 |
|
|
|
19 |
#include <Python.h>
|
18 |
#include <Python.h>
|
20 |
#include <structmember.h>
|
19 |
#include <structmember.h>
|
21 |
#include <bytearrayobject.h>
|
20 |
#include <bytearrayobject.h>
|
22 |
|
21 |
|
23 |
#include <strings.h>
|
22 |
#include <strings.h>
|
|
... |
|
... |
35 |
#include "pathut.h"
|
34 |
#include "pathut.h"
|
36 |
#include "wasastringtoquery.h"
|
35 |
#include "wasastringtoquery.h"
|
37 |
#include "wasatorcl.h"
|
36 |
#include "wasatorcl.h"
|
38 |
#include "debuglog.h"
|
37 |
#include "debuglog.h"
|
39 |
#include "pathut.h"
|
38 |
#include "pathut.h"
|
|
|
39 |
#include "plaintorich.h"
|
|
|
40 |
#include "hldata.h"
|
|
|
41 |
|
|
|
42 |
#include "pyrecoll.h"
|
40 |
|
43 |
|
41 |
static set<Rcl::Db *> the_dbs;
|
44 |
static set<Rcl::Db *> the_dbs;
|
42 |
static set<Rcl::Query *> the_queries;
|
45 |
static set<Rcl::Query *> the_queries;
|
43 |
static set<Rcl::Doc *> the_docs;
|
46 |
static set<Rcl::Doc *> the_docs;
|
44 |
|
47 |
|
45 |
static RclConfig *rclconfig;
|
48 |
static RclConfig *rclconfig;
|
46 |
|
49 |
|
47 |
// This has to exist somewhere in the python api ??
|
50 |
// This has to exist somewhere in the python api ??
|
48 |
PyObject *obj_Create(PyTypeObject *tp, PyObject *args, PyObject *kwargs)
|
51 |
static PyObject *obj_Create(PyTypeObject *tp, PyObject *args, PyObject *kwargs)
|
49 |
{
|
52 |
{
|
50 |
PyObject *result = tp->tp_new(tp, args, kwargs);
|
53 |
PyObject *result = tp->tp_new(tp, args, kwargs);
|
51 |
if (result && tp->tp_init(result, args, kwargs) < 0)
|
54 |
if (result && tp->tp_init(result, args, kwargs) < 0)
|
52 |
return 0;
|
55 |
return 0;
|
53 |
return result;
|
56 |
return result;
|
|
... |
|
... |
250 |
}
|
253 |
}
|
251 |
|
254 |
|
252 |
|
255 |
|
253 |
///////////////////////////////////////////////////////////////////////
|
256 |
///////////////////////////////////////////////////////////////////////
|
254 |
///// DOC Doc code
|
257 |
///// DOC Doc code
|
255 |
typedef struct {
|
|
|
256 |
PyObject_HEAD
|
|
|
257 |
/* Type-specific fields go here. */
|
|
|
258 |
Rcl::Doc *doc;
|
|
|
259 |
} recoll_DocObject;
|
|
|
260 |
|
258 |
|
261 |
static void
|
259 |
static void
|
262 |
Doc_dealloc(recoll_DocObject *self)
|
260 |
Doc_dealloc(recoll_DocObject *self)
|
263 |
{
|
261 |
{
|
264 |
LOGDEB(("Doc_dealloc\n"));
|
262 |
LOGDEB(("Doc_dealloc\n"));
|
|
... |
|
... |
290 |
the_docs.erase(self->doc);
|
288 |
the_docs.erase(self->doc);
|
291 |
delete self->doc;
|
289 |
delete self->doc;
|
292 |
self->doc = new Rcl::Doc;
|
290 |
self->doc = new Rcl::Doc;
|
293 |
if (self->doc == 0)
|
291 |
if (self->doc == 0)
|
294 |
return -1;
|
292 |
return -1;
|
|
|
293 |
self->rclconfig = rclconfig;
|
295 |
the_docs.insert(self->doc);
|
294 |
the_docs.insert(self->doc);
|
296 |
return 0;
|
295 |
return 0;
|
297 |
}
|
296 |
}
|
298 |
|
297 |
|
299 |
PyDoc_STRVAR(doc_getbinurl,
|
298 |
PyDoc_STRVAR(doc_Doc_getbinurl,
|
300 |
"getbinurl(none) -> binary url\n"
|
299 |
"getbinurl(none) -> binary url\n"
|
301 |
"\n"
|
300 |
"\n"
|
302 |
"Returns an URL with a path part which is a as bit for bit copy of the \n"
|
301 |
"Returns an URL with a path part which is a as bit for bit copy of the \n"
|
303 |
"file system path, without encoding\n"
|
302 |
"file system path, without encoding\n"
|
304 |
);
|
303 |
);
|
|
... |
|
... |
314 |
}
|
313 |
}
|
315 |
return PyByteArray_FromStringAndSize(self->doc->url.c_str(),
|
314 |
return PyByteArray_FromStringAndSize(self->doc->url.c_str(),
|
316 |
self->doc->url.size());
|
315 |
self->doc->url.size());
|
317 |
}
|
316 |
}
|
318 |
|
317 |
|
319 |
PyDoc_STRVAR(doc_setbinurl,
|
318 |
PyDoc_STRVAR(doc_Doc_setbinurl,
|
320 |
"setbinurl(url) -> binary url\n"
|
319 |
"setbinurl(url) -> binary url\n"
|
321 |
"\n"
|
320 |
"\n"
|
322 |
"Set the URL from binary path like file://may/contain/unencodable/bytes\n"
|
321 |
"Set the URL from binary path like file://may/contain/unencodable/bytes\n"
|
323 |
);
|
322 |
);
|
324 |
|
323 |
|
|
... |
|
... |
338 |
self->doc->url = string(PyByteArray_AsString(value),
|
337 |
self->doc->url = string(PyByteArray_AsString(value),
|
339 |
PyByteArray_Size(value));
|
338 |
PyByteArray_Size(value));
|
340 |
Py_RETURN_NONE;
|
339 |
Py_RETURN_NONE;
|
341 |
}
|
340 |
}
|
342 |
|
341 |
|
|
|
342 |
PyDoc_STRVAR(doc_Doc_keys,
|
|
|
343 |
"keys() -> list of doc object keys (attribute names)\n"
|
|
|
344 |
);
|
|
|
345 |
static PyObject *
|
|
|
346 |
Doc_keys(recoll_DocObject *self)
|
|
|
347 |
{
|
|
|
348 |
LOGDEB(("Doc_keys\n"));
|
|
|
349 |
if (self->doc == 0 ||
|
|
|
350 |
the_docs.find(self->doc) == the_docs.end()) {
|
|
|
351 |
PyErr_SetString(PyExc_AttributeError, "doc");
|
|
|
352 |
return 0;
|
|
|
353 |
}
|
|
|
354 |
|
|
|
355 |
PyObject *pkeys = PyList_New(0);
|
|
|
356 |
for (map<string,string>::const_iterator it = self->doc->meta.begin();
|
|
|
357 |
it != self->doc->meta.end(); it++) {
|
|
|
358 |
PyList_Append(pkeys, PyUnicode_Decode(it->first.c_str(),
|
|
|
359 |
it->first.size(),
|
|
|
360 |
"UTF-8", "replace"));
|
|
|
361 |
}
|
|
|
362 |
return pkeys;
|
|
|
363 |
}
|
|
|
364 |
|
|
|
365 |
PyDoc_STRVAR(doc_Doc_items,
|
|
|
366 |
"items() -> dictionary of doc object keys/values\n"
|
|
|
367 |
);
|
|
|
368 |
static PyObject *
|
|
|
369 |
Doc_items(recoll_DocObject *self)
|
|
|
370 |
{
|
|
|
371 |
LOGDEB(("Doc_getbinurl\n"));
|
|
|
372 |
if (self->doc == 0 ||
|
|
|
373 |
the_docs.find(self->doc) == the_docs.end()) {
|
|
|
374 |
PyErr_SetString(PyExc_AttributeError, "doc");
|
|
|
375 |
return 0;
|
|
|
376 |
}
|
|
|
377 |
|
|
|
378 |
PyObject *pdict = PyDict_New();
|
|
|
379 |
for (map<string,string>::const_iterator it = self->doc->meta.begin();
|
|
|
380 |
it != self->doc->meta.end(); it++) {
|
|
|
381 |
PyDict_SetItem(pdict,
|
|
|
382 |
PyUnicode_Decode(it->first.c_str(),
|
|
|
383 |
it->first.size(),
|
|
|
384 |
"UTF-8", "replace"),
|
|
|
385 |
PyUnicode_Decode(it->second.c_str(),
|
|
|
386 |
it->second.size(),
|
|
|
387 |
"UTF-8", "replace"));
|
|
|
388 |
}
|
|
|
389 |
return pdict;
|
|
|
390 |
}
|
|
|
391 |
|
|
|
392 |
PyDoc_STRVAR(doc_Doc_get,
|
|
|
393 |
"get(key) -> value\n"
|
|
|
394 |
"Retrieve the named doc attribute\n"
|
|
|
395 |
);
|
|
|
396 |
|
|
|
397 |
static PyObject *
|
|
|
398 |
Doc_get(recoll_DocObject *self, PyObject *args)
|
|
|
399 |
{
|
|
|
400 |
LOGDEB(("Doc_get\n"));
|
|
|
401 |
char *sutf8 = 0; // needs freeing
|
|
|
402 |
if (!PyArg_ParseTuple(args, "es:Doc_get",
|
|
|
403 |
"utf-8", &sutf8)) {
|
|
|
404 |
return 0;
|
|
|
405 |
}
|
|
|
406 |
string key(sutf8);
|
|
|
407 |
PyMem_Free(sutf8);
|
|
|
408 |
|
|
|
409 |
if (self->doc == 0 ||
|
|
|
410 |
the_docs.find(self->doc) == the_docs.end()) {
|
|
|
411 |
PyErr_SetString(PyExc_AttributeError, "doc??");
|
|
|
412 |
return 0;
|
|
|
413 |
}
|
|
|
414 |
string value;
|
|
|
415 |
if (self->doc->getmeta(key, 0)) {
|
|
|
416 |
value = self->doc->meta[key];
|
|
|
417 |
return PyUnicode_Decode(value.c_str(),
|
|
|
418 |
value.size(),
|
|
|
419 |
"UTF-8", "replace");
|
|
|
420 |
}
|
|
|
421 |
Py_RETURN_NONE;
|
|
|
422 |
}
|
343 |
|
423 |
|
344 |
static PyMethodDef Doc_methods[] = {
|
424 |
static PyMethodDef Doc_methods[] = {
|
345 |
{"getbinurl", (PyCFunction)Doc_getbinurl, METH_NOARGS,
|
425 |
{"getbinurl", (PyCFunction)Doc_getbinurl, METH_NOARGS, doc_Doc_getbinurl},
|
346 |
doc_getbinurl},
|
|
|
347 |
{"setbinurl", (PyCFunction)Doc_setbinurl, METH_O,
|
426 |
{"setbinurl", (PyCFunction)Doc_setbinurl, METH_O, doc_Doc_setbinurl},
|
348 |
doc_setbinurl},
|
427 |
{"keys", (PyCFunction)Doc_keys, METH_NOARGS, doc_Doc_keys},
|
|
|
428 |
{"items", (PyCFunction)Doc_items, METH_NOARGS, doc_Doc_items},
|
|
|
429 |
{"get", (PyCFunction)Doc_get, METH_VARARGS, doc_Doc_get},
|
349 |
{NULL} /* Sentinel */
|
430 |
{NULL} /* Sentinel */
|
350 |
};
|
431 |
};
|
351 |
|
432 |
|
352 |
static PyObject *
|
433 |
static PyObject *
|
353 |
Doc_getattr(recoll_DocObject *self, char *name)
|
434 |
Doc_getattr(recoll_DocObject *self, char *name)
|
|
... |
|
... |
378 |
}
|
459 |
}
|
379 |
break;
|
460 |
break;
|
380 |
case 'f':
|
461 |
case 'f':
|
381 |
if (!key.compare(Rcl::Doc::keyfs)) {
|
462 |
if (!key.compare(Rcl::Doc::keyfs)) {
|
382 |
value = self->doc->fbytes; found = true;
|
463 |
value = self->doc->fbytes; found = true;
|
383 |
} else if (!key.compare(Rcl::Doc::keyfs)) {
|
|
|
384 |
value = self->doc->fbytes; found = true;
|
|
|
385 |
} else if (!key.compare(Rcl::Doc::keyfmt)) {
|
464 |
} else if (!key.compare(Rcl::Doc::keyfmt)) {
|
386 |
value = self->doc->fmtime; found = true;
|
465 |
value = self->doc->fmtime; found = true;
|
387 |
}
|
466 |
}
|
388 |
break;
|
467 |
break;
|
389 |
case 'd':
|
468 |
case 'd':
|
|
... |
|
... |
415 |
if (!key.compare(Rcl::Doc::keysig)) {
|
494 |
if (!key.compare(Rcl::Doc::keysig)) {
|
416 |
value = self->doc->sig; found = true;
|
495 |
value = self->doc->sig; found = true;
|
417 |
} else if (!key.compare(Rcl::Doc::keysz)) {
|
496 |
} else if (!key.compare(Rcl::Doc::keysz)) {
|
418 |
value = self->doc->dbytes.empty() ? self->doc->fbytes :
|
497 |
value = self->doc->dbytes.empty() ? self->doc->fbytes :
|
419 |
self->doc->dbytes; found = true;
|
498 |
self->doc->dbytes; found = true;
|
|
|
499 |
}
|
|
|
500 |
break;
|
|
|
501 |
case 't':
|
|
|
502 |
if (!key.compare("text")) {
|
|
|
503 |
value = self->doc->text; found = true;
|
420 |
}
|
504 |
}
|
421 |
break;
|
505 |
break;
|
422 |
}
|
506 |
}
|
423 |
|
507 |
|
424 |
if (!found) {
|
508 |
if (!found) {
|
|
... |
|
... |
430 |
PyErr_Clear();
|
514 |
PyErr_Clear();
|
431 |
}
|
515 |
}
|
432 |
|
516 |
|
433 |
if (self->doc->getmeta(key, 0)) {
|
517 |
if (self->doc->getmeta(key, 0)) {
|
434 |
value = self->doc->meta[key];
|
518 |
value = self->doc->meta[key];
|
|
|
519 |
found = true;
|
435 |
}
|
520 |
}
|
|
|
521 |
}
|
|
|
522 |
|
|
|
523 |
if (!found) {
|
|
|
524 |
LOGDEB(("Doc_getattr: name [%s] key [%s] Not found\n",
|
|
|
525 |
name, key.c_str()));
|
|
|
526 |
Py_RETURN_NONE;
|
436 |
}
|
527 |
}
|
437 |
|
528 |
|
438 |
LOGDEB(("Doc_getattr: [%s] (%s) -> [%s]\n",
|
529 |
LOGDEB(("Doc_getattr: [%s] (%s) -> [%s]\n",
|
439 |
name, key.c_str(), value.c_str()));
|
530 |
name, key.c_str(), value.c_str()));
|
440 |
// Return a python unicode object
|
531 |
// Return a python unicode object
|
|
... |
|
... |
681 |
static PyObject *
|
772 |
static PyObject *
|
682 |
Query_sortby(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
773 |
Query_sortby(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
683 |
{
|
774 |
{
|
684 |
LOGDEB(("Query_sortby\n"));
|
775 |
LOGDEB(("Query_sortby\n"));
|
685 |
static const char *kwlist[] = {"field", "ascending", NULL};
|
776 |
static const char *kwlist[] = {"field", "ascending", NULL};
|
|
|
777 |
PyObject *ascobj = 0;
|
686 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|i", (char**)kwlist,
|
778 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|i", (char**)kwlist,
|
687 |
&self->sortfield,
|
779 |
&self->sortfield,
|
688 |
&self->ascending))
|
780 |
&ascobj))
|
689 |
return 0;
|
781 |
return 0;
|
|
|
782 |
|
|
|
783 |
if (ascobj != 0 && !PyObject_IsTrue(ascobj))
|
|
|
784 |
self->ascending = false;
|
|
|
785 |
else
|
|
|
786 |
self->ascending = true;
|
|
|
787 |
|
690 |
Py_RETURN_NONE;
|
788 |
Py_RETURN_NONE;
|
691 |
}
|
789 |
}
|
692 |
|
790 |
|
693 |
PyDoc_STRVAR(doc_Query_execute,
|
791 |
PyDoc_STRVAR(doc_Query_execute,
|
694 |
"execute(query_string, stemming=1|0, stemlang=\"stemming language\")\n"
|
792 |
"execute(query_string, stemming=1|0, stemlang=\"stemming language\")\n"
|
|
... |
|
... |
705 |
LOGDEB1(("Query_execute\n"));
|
803 |
LOGDEB1(("Query_execute\n"));
|
706 |
static const char *kwlist[] = {"query_string", "stemming", "stemlang", NULL};
|
804 |
static const char *kwlist[] = {"query_string", "stemming", "stemlang", NULL};
|
707 |
char *sutf8 = 0; // needs freeing
|
805 |
char *sutf8 = 0; // needs freeing
|
708 |
char *sstemlang = 0;
|
806 |
char *sstemlang = 0;
|
709 |
int dostem = 1;
|
807 |
int dostem = 1;
|
|
|
808 |
PyObject *dostemobj = 0;
|
710 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "es|ies:Query_execute",
|
809 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "es|Oes:Query_execute",
|
711 |
(char**)kwlist, "utf-8", &sutf8,
|
810 |
(char**)kwlist, "utf-8", &sutf8,
|
712 |
&dostem,
|
811 |
&dostemobj,
|
713 |
"utf-8", &sstemlang)) {
|
812 |
"utf-8", &sstemlang)) {
|
714 |
return 0;
|
813 |
return 0;
|
715 |
}
|
814 |
}
|
716 |
|
815 |
if (dostemobj != 0 && !PyObject_IsTrue(dostemobj))
|
|
|
816 |
dostem = 0;
|
717 |
|
817 |
|
718 |
string utf8(sutf8);
|
818 |
string utf8(sutf8);
|
719 |
PyMem_Free(sutf8);
|
819 |
PyMem_Free(sutf8);
|
720 |
string stemlang("english");
|
820 |
string stemlang("english");
|
721 |
if (sstemlang) {
|
821 |
if (sstemlang) {
|
|
... |
|
... |
826 |
doc->meta[Rcl::Doc::keyfs] = doc->fbytes;
|
926 |
doc->meta[Rcl::Doc::keyfs] = doc->fbytes;
|
827 |
doc->meta[Rcl::Doc::keyds] = doc->dbytes;
|
927 |
doc->meta[Rcl::Doc::keyds] = doc->dbytes;
|
828 |
return (PyObject *)result;
|
928 |
return (PyObject *)result;
|
829 |
}
|
929 |
}
|
830 |
|
930 |
|
|
|
931 |
|
|
|
932 |
PyDoc_STRVAR(doc_Query_highlight,
|
|
|
933 |
"highlight(text, ishtml = 0/1, eolbr = 0/1, methods = object))\n"
|
|
|
934 |
"Will insert <span \"class=rclmatch\"></span> tags around the match areas\n"
|
|
|
935 |
"in the input text and return the modified text\n"
|
|
|
936 |
"ishtml can be set to indicate that the input text is html and html special\n"
|
|
|
937 |
" characters should not be escaped\n"
|
|
|
938 |
"methods if set should be an object with methods startMatch(i) and endMatch()\n"
|
|
|
939 |
" which will be called for each match and should return a begin and end tag\n"
|
|
|
940 |
);
|
|
|
941 |
|
|
|
942 |
class PyPlainToRich: public PlainToRich {
|
|
|
943 |
public:
|
|
|
944 |
PyPlainToRich(PyObject *methods)
|
|
|
945 |
: m_methods(methods)
|
|
|
946 |
{
|
|
|
947 |
}
|
|
|
948 |
virtual ~PyPlainToRich()
|
|
|
949 |
{
|
|
|
950 |
}
|
|
|
951 |
virtual string startMatch(unsigned int idx)
|
|
|
952 |
{
|
|
|
953 |
PyObject *res = 0;
|
|
|
954 |
if (m_methods)
|
|
|
955 |
res = PyObject_CallMethod(m_methods, (char *)"startMatch",
|
|
|
956 |
(char *)"(i)", idx);
|
|
|
957 |
if (res == 0)
|
|
|
958 |
return "<span class=\"rclmatch\">";
|
|
|
959 |
PyObject *res1 = res;
|
|
|
960 |
if (PyUnicode_Check(res))
|
|
|
961 |
res1 = PyUnicode_AsUTF8String(res);
|
|
|
962 |
return PyString_AsString(res1);
|
|
|
963 |
}
|
|
|
964 |
|
|
|
965 |
virtual string endMatch()
|
|
|
966 |
{
|
|
|
967 |
PyObject *res = 0;
|
|
|
968 |
if (m_methods)
|
|
|
969 |
res = PyObject_CallMethod(m_methods, (char *)"endMatch", 0);
|
|
|
970 |
if (res == 0)
|
|
|
971 |
return "</span res is null>";
|
|
|
972 |
PyObject *res1 = res;
|
|
|
973 |
if (PyUnicode_Check(res))
|
|
|
974 |
res1 = PyUnicode_AsUTF8String(res);
|
|
|
975 |
return PyString_AsString(res1);
|
|
|
976 |
}
|
|
|
977 |
|
|
|
978 |
PyObject *m_methods;
|
|
|
979 |
};
|
|
|
980 |
|
|
|
981 |
static PyObject *
|
|
|
982 |
Query_highlight(recoll_QueryObject* self, PyObject *args, PyObject *kwargs)
|
|
|
983 |
{
|
|
|
984 |
LOGDEB1(("Query_highlight\n"));
|
|
|
985 |
static const char *kwlist[] = {"text", "ishtml", "methods", NULL};
|
|
|
986 |
char *sutf8 = 0; // needs freeing
|
|
|
987 |
int ishtml = 0;
|
|
|
988 |
PyObject *ishtmlobj = 0;
|
|
|
989 |
PyObject *methods = 0;
|
|
|
990 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "es|OO:Query_highlight",
|
|
|
991 |
(char**)kwlist,
|
|
|
992 |
"utf-8", &sutf8,
|
|
|
993 |
&ishtml,
|
|
|
994 |
&methods)) {
|
|
|
995 |
return 0;
|
|
|
996 |
}
|
|
|
997 |
string utf8(sutf8);
|
|
|
998 |
LOGDEB(("Query_highlight: [%s] ishtml %d\n", sutf8, ishtml));
|
|
|
999 |
PyMem_Free(sutf8);
|
|
|
1000 |
if (ishtmlobj != 0 && PyObject_IsTrue(ishtmlobj))
|
|
|
1001 |
ishtml = 1;
|
|
|
1002 |
|
|
|
1003 |
if (self->query == 0 ||
|
|
|
1004 |
the_queries.find(self->query) == the_queries.end()) {
|
|
|
1005 |
PyErr_SetString(PyExc_AttributeError, "query");
|
|
|
1006 |
return 0;
|
|
|
1007 |
}
|
|
|
1008 |
|
|
|
1009 |
RefCntr<Rcl::SearchData> sd = self->query->getSD();
|
|
|
1010 |
if (sd.isNull()) {
|
|
|
1011 |
PyErr_SetString(PyExc_ValueError, "Query not initialized");
|
|
|
1012 |
return 0;
|
|
|
1013 |
}
|
|
|
1014 |
HighlightData hldata;
|
|
|
1015 |
sd->getTerms(hldata);
|
|
|
1016 |
PyPlainToRich hler(methods);
|
|
|
1017 |
hler.set_inputhtml(ishtml);
|
|
|
1018 |
list<string> out;
|
|
|
1019 |
hler.plaintorich(utf8, out, hldata, 5000000);
|
|
|
1020 |
if (out.empty()) {
|
|
|
1021 |
PyErr_SetString(PyExc_ValueError, "Plaintorich failed");
|
|
|
1022 |
return 0;
|
|
|
1023 |
}
|
|
|
1024 |
PyObject* unicode = PyUnicode_FromStringAndSize(out.begin()->c_str(),
|
|
|
1025 |
out.begin()->size());
|
|
|
1026 |
return Py_BuildValue("u#", PyUnicode_AsUnicode(unicode),
|
|
|
1027 |
PyUnicode_GetSize(unicode));
|
|
|
1028 |
}
|
|
|
1029 |
|
|
|
1030 |
PyDoc_STRVAR(doc_Query_makedocabstract,
|
|
|
1031 |
"makedocabstract(doc, methods = object))\n"
|
|
|
1032 |
"Will create a snippets abstract for doc by selecting text around the match\n"
|
|
|
1033 |
" terms\n"
|
|
|
1034 |
"If methods is set, will also perform highlighting. See the highlight method\n"
|
|
|
1035 |
);
|
|
|
1036 |
static PyObject *
|
|
|
1037 |
Query_makedocabstract(recoll_QueryObject* self, PyObject *args,PyObject *kwargs)
|
|
|
1038 |
{
|
|
|
1039 |
LOGDEB(("Db_makeDocAbstract\n"));
|
|
|
1040 |
static const char *kwlist[] = {"doc", "methods", NULL};
|
|
|
1041 |
recoll_DocObject *pydoc = 0;
|
|
|
1042 |
PyObject *hlmethods = 0;
|
|
|
1043 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!|O:Query_makeDocAbstract",
|
|
|
1044 |
(char **)kwlist,
|
|
|
1045 |
&recoll_DocType, &pydoc,
|
|
|
1046 |
&hlmethods)) {
|
|
|
1047 |
return 0;
|
|
|
1048 |
}
|
|
|
1049 |
|
|
|
1050 |
if (pydoc->doc == 0 || the_docs.find(pydoc->doc) == the_docs.end()) {
|
|
|
1051 |
LOGERR(("Query_makeDocAbstract: doc not found %p\n", pydoc->doc));
|
|
|
1052 |
PyErr_SetString(PyExc_AttributeError, "doc");
|
|
|
1053 |
return 0;
|
|
|
1054 |
}
|
|
|
1055 |
if (the_queries.find(self->query) == the_queries.end()) {
|
|
|
1056 |
LOGERR(("Query_makeDocAbstract: query not found %p\n", self->query));
|
|
|
1057 |
PyErr_SetString(PyExc_AttributeError, "query");
|
|
|
1058 |
return 0;
|
|
|
1059 |
}
|
|
|
1060 |
RefCntr<Rcl::SearchData> sd = self->query->getSD();
|
|
|
1061 |
if (sd.isNull()) {
|
|
|
1062 |
PyErr_SetString(PyExc_ValueError, "Query not initialized");
|
|
|
1063 |
return 0;
|
|
|
1064 |
}
|
|
|
1065 |
string abstract;
|
|
|
1066 |
if (hlmethods == 0) {
|
|
|
1067 |
if (!self->query->makeDocAbstract(*(pydoc->doc), abstract)) {
|
|
|
1068 |
PyErr_SetString(PyExc_EnvironmentError,
|
|
|
1069 |
"rcl makeDocAbstract failed");
|
|
|
1070 |
return 0;
|
|
|
1071 |
}
|
|
|
1072 |
} else {
|
|
|
1073 |
HighlightData hldata;
|
|
|
1074 |
sd->getTerms(hldata);
|
|
|
1075 |
PyPlainToRich hler(hlmethods);
|
|
|
1076 |
hler.set_inputhtml(0);
|
|
|
1077 |
vector<string> vabs;
|
|
|
1078 |
self->query->makeDocAbstract(*pydoc->doc, vabs);
|
|
|
1079 |
for (unsigned int i = 0; i < vabs.size(); i++) {
|
|
|
1080 |
if (vabs[i].empty())
|
|
|
1081 |
continue;
|
|
|
1082 |
list<string> lr;
|
|
|
1083 |
// There may be data like page numbers before the snippet text.
|
|
|
1084 |
// will be in brackets.
|
|
|
1085 |
string::size_type bckt = vabs[i].find("]");
|
|
|
1086 |
if (bckt == string::npos) {
|
|
|
1087 |
hler.plaintorich(vabs[i], lr, hldata);
|
|
|
1088 |
} else {
|
|
|
1089 |
hler.plaintorich(vabs[i].substr(bckt), lr, hldata);
|
|
|
1090 |
lr.front() = vabs[i].substr(0, bckt) + lr.front();
|
|
|
1091 |
}
|
|
|
1092 |
abstract += lr.front();
|
|
|
1093 |
abstract += "...";
|
|
|
1094 |
}
|
|
|
1095 |
}
|
|
|
1096 |
|
|
|
1097 |
// Return a python unicode object
|
|
|
1098 |
return PyUnicode_Decode(abstract.c_str(), abstract.size(),
|
|
|
1099 |
"UTF-8", "replace");
|
|
|
1100 |
}
|
|
|
1101 |
|
|
|
1102 |
|
|
|
1103 |
PyDoc_STRVAR(doc_Query_getxquery,
|
|
|
1104 |
"getxquery(None) -> Unicode string\n"
|
|
|
1105 |
"\n"
|
|
|
1106 |
"Retrieves the Xapian query description as a Unicode string.\n"
|
|
|
1107 |
"Meaningful only after executexx\n"
|
|
|
1108 |
);
|
|
|
1109 |
static PyObject *
|
|
|
1110 |
Query_getxquery(recoll_QueryObject* self, PyObject *, PyObject *)
|
|
|
1111 |
{
|
|
|
1112 |
LOGDEB(("Query_getxquery\n"));
|
|
|
1113 |
|
|
|
1114 |
if (self->query == 0 ||
|
|
|
1115 |
the_queries.find(self->query) == the_queries.end()) {
|
|
|
1116 |
PyErr_SetString(PyExc_AttributeError, "query");
|
|
|
1117 |
return 0;
|
|
|
1118 |
}
|
|
|
1119 |
RefCntr<Rcl::SearchData> sd = self->query->getSD();
|
|
|
1120 |
if (sd.isNull()) {
|
|
|
1121 |
PyErr_SetString(PyExc_ValueError, "Query not initialized");
|
|
|
1122 |
return 0;
|
|
|
1123 |
}
|
|
|
1124 |
string desc = sd->getDescription();
|
|
|
1125 |
return PyUnicode_Decode(desc.c_str(), desc.size(), "UTF-8", "replace");
|
|
|
1126 |
}
|
|
|
1127 |
|
|
|
1128 |
PyDoc_STRVAR(doc_Query_getgroups,
|
|
|
1129 |
"getgroups(None) -> a list of pairs\n"
|
|
|
1130 |
"\n"
|
|
|
1131 |
"Retrieves the expanded query terms. Meaningful only after executexx\n"
|
|
|
1132 |
"In each pair, the first entry is a list of user terms, the second a list of\n"
|
|
|
1133 |
"query terms as derived from the user terms and used in the Xapian Query.\n"
|
|
|
1134 |
"The size of each list is one for simple terms, or more for group and phrase\n"
|
|
|
1135 |
"clauses\n"
|
|
|
1136 |
);
|
|
|
1137 |
static PyObject *
|
|
|
1138 |
Query_getgroups(recoll_QueryObject* self, PyObject *, PyObject *)
|
|
|
1139 |
{
|
|
|
1140 |
LOGDEB(("Query_getxquery\n"));
|
|
|
1141 |
|
|
|
1142 |
if (self->query == 0 ||
|
|
|
1143 |
the_queries.find(self->query) == the_queries.end()) {
|
|
|
1144 |
PyErr_SetString(PyExc_AttributeError, "query");
|
|
|
1145 |
return 0;
|
|
|
1146 |
}
|
|
|
1147 |
RefCntr<Rcl::SearchData> sd = self->query->getSD();
|
|
|
1148 |
if (sd.isNull()) {
|
|
|
1149 |
PyErr_SetString(PyExc_ValueError, "Query not initialized");
|
|
|
1150 |
return 0;
|
|
|
1151 |
}
|
|
|
1152 |
HighlightData hld;
|
|
|
1153 |
sd->getTerms(hld);
|
|
|
1154 |
PyObject *mainlist = PyList_New(0);
|
|
|
1155 |
PyObject *ulist;
|
|
|
1156 |
PyObject *xlist;
|
|
|
1157 |
// We walk the groups vector. For each we retrieve the user group,
|
|
|
1158 |
// make a python list of each, then group those in a pair, and
|
|
|
1159 |
// append this to the main list.
|
|
|
1160 |
for (unsigned int i = 0; i < hld.groups.size(); i++) {
|
|
|
1161 |
unsigned int ugidx = hld.grpsugidx[i];
|
|
|
1162 |
ulist = PyList_New(hld.ugroups[ugidx].size());
|
|
|
1163 |
for (unsigned int j = 0; j < hld.ugroups[ugidx].size(); j++) {
|
|
|
1164 |
PyList_SetItem(ulist, j,
|
|
|
1165 |
PyUnicode_Decode(hld.ugroups[ugidx][j].c_str(),
|
|
|
1166 |
hld.ugroups[ugidx][j].size(),
|
|
|
1167 |
"UTF-8", "replace"));
|
|
|
1168 |
}
|
|
|
1169 |
|
|
|
1170 |
xlist = PyList_New(hld.groups[i].size());
|
|
|
1171 |
for (unsigned int j = 0; j < hld.groups[i].size(); j++) {
|
|
|
1172 |
PyList_SetItem(xlist, j,
|
|
|
1173 |
PyUnicode_Decode(hld.groups[i][j].c_str(),
|
|
|
1174 |
hld.groups[i][j].size(),
|
|
|
1175 |
"UTF-8", "replace"));
|
|
|
1176 |
}
|
|
|
1177 |
PyList_Append(mainlist, Py_BuildValue("(OO)", ulist, xlist));
|
|
|
1178 |
}
|
|
|
1179 |
return mainlist;
|
|
|
1180 |
}
|
|
|
1181 |
|
831 |
static PyMethodDef Query_methods[] = {
|
1182 |
static PyMethodDef Query_methods[] = {
|
832 |
{"execute", (PyCFunction)Query_execute, METH_VARARGS|METH_KEYWORDS,
|
1183 |
{"execute", (PyCFunction)Query_execute, METH_VARARGS|METH_KEYWORDS,
|
833 |
doc_Query_execute},
|
1184 |
doc_Query_execute},
|
834 |
{"executesd", (PyCFunction)Query_executesd, METH_VARARGS|METH_KEYWORDS,
|
1185 |
{"executesd", (PyCFunction)Query_executesd, METH_VARARGS|METH_KEYWORDS,
|
835 |
doc_Query_executesd},
|
1186 |
doc_Query_executesd},
|
836 |
{"fetchone", (PyCFunction)Query_fetchone, METH_VARARGS,
|
1187 |
{"fetchone", (PyCFunction)Query_fetchone, METH_NOARGS,
|
837 |
doc_Query_fetchone},
|
1188 |
doc_Query_fetchone},
|
838 |
{"sortby", (PyCFunction)Query_sortby, METH_VARARGS|METH_KEYWORDS,
|
1189 |
{"sortby", (PyCFunction)Query_sortby, METH_VARARGS|METH_KEYWORDS,
|
839 |
doc_Query_sortby},
|
1190 |
doc_Query_sortby},
|
|
|
1191 |
{"highlight", (PyCFunction)Query_highlight, METH_VARARGS|METH_KEYWORDS,
|
|
|
1192 |
doc_Query_highlight},
|
|
|
1193 |
{"getxquery", (PyCFunction)Query_getxquery, METH_NOARGS,
|
|
|
1194 |
doc_Query_getxquery},
|
|
|
1195 |
{"getgroups", (PyCFunction)Query_getgroups, METH_NOARGS,
|
|
|
1196 |
doc_Query_getgroups},
|
|
|
1197 |
{"makedocabstract", (PyCFunction)Query_makedocabstract,
|
|
|
1198 |
METH_VARARGS|METH_KEYWORDS, doc_Query_makedocabstract},
|
840 |
{NULL} /* Sentinel */
|
1199 |
{NULL} /* Sentinel */
|
841 |
};
|
1200 |
};
|
842 |
|
1201 |
|
843 |
static PyMemberDef Query_members[] = {
|
1202 |
static PyMemberDef Query_members[] = {
|
844 |
{(char*)"next", T_INT, offsetof(recoll_QueryObject, next), 0,
|
1203 |
{(char*)"next", T_INT, offsetof(recoll_QueryObject, next), 0,
|
|
... |
|
... |
1035 |
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
|
1394 |
if (self->db == 0 || the_dbs.find(self->db) == the_dbs.end()) {
|
1036 |
LOGERR(("Db_query: db not found %p\n", self->db));
|
1395 |
LOGERR(("Db_query: db not found %p\n", self->db));
|
1037 |
PyErr_SetString(PyExc_AttributeError, "db id not found");
|
1396 |
PyErr_SetString(PyExc_AttributeError, "db id not found");
|
1038 |
return 0;
|
1397 |
return 0;
|
1039 |
}
|
1398 |
}
|
|
|
1399 |
LOGDEB(("Db_setAbstractParams: mxchrs %d, ctxwrds %d\n", maxchars, ctxwords));
|
1040 |
self->db->setAbstractParams(-1, maxchars, ctxwords);
|
1400 |
self->db->setAbstractParams(-1, maxchars, ctxwords);
|
1041 |
Py_RETURN_NONE;
|
1401 |
Py_RETURN_NONE;
|
1042 |
}
|
1402 |
}
|
1043 |
|
1403 |
|
1044 |
static PyObject *
|
1404 |
static PyObject *
|
1045 |
Db_makeDocAbstract(recoll_DbObject* self, PyObject *args, PyObject *)
|
1405 |
Db_makeDocAbstract(recoll_DbObject* self, PyObject *args)
|
1046 |
{
|
1406 |
{
|
1047 |
LOGDEB(("Db_makeDocAbstract\n"));
|
1407 |
LOGDEB(("Db_makeDocAbstract\n"));
|
1048 |
recoll_DocObject *pydoc = 0;
|
1408 |
recoll_DocObject *pydoc = 0;
|
1049 |
recoll_QueryObject *pyquery = 0;
|
1409 |
recoll_QueryObject *pyquery = 0;
|
1050 |
if (!PyArg_ParseTuple(args, "O!O!:Db_makeDocAbstract",
|
1410 |
if (!PyArg_ParseTuple(args, "O!O!:Db_makeDocAbstract",
|
|
... |
|
... |
1339 |
Py_INCREF(&recoll_SearchDataType);
|
1699 |
Py_INCREF(&recoll_SearchDataType);
|
1340 |
PyModule_AddObject(m, "SearchData", (PyObject *)&recoll_SearchDataType);
|
1700 |
PyModule_AddObject(m, "SearchData", (PyObject *)&recoll_SearchDataType);
|
1341 |
PyModule_AddStringConstant(m, "__doc__",
|
1701 |
PyModule_AddStringConstant(m, "__doc__",
|
1342 |
pyrecoll_doc_string);
|
1702 |
pyrecoll_doc_string);
|
1343 |
|
1703 |
|
|
|
1704 |
|
|
|
1705 |
PyObject* doctypecapsule =
|
|
|
1706 |
PyCapsule_New(&recoll_DocType, "recoll.doctype", 0);
|
|
|
1707 |
PyModule_AddObject(m, "doctype", doctypecapsule);
|
1344 |
}
|
1708 |
}
|