Download this file

xesam-recoll-service    232 lines (186 with data), 8.2 kB

#!/usr/bin/env python
"""
Recoll implementation of a xesam server. 
Based on the example in the xesam-tools package by:
 Mikkel Kamstrup Erlandsen <mikkel.kamstrup@gmail.com>

Run it like

xesam-recoll-service 

And launch a search on it via

  xesam-tool search hello

"""

# Sets up path to uninstalled xesam module
import demo

import xesam
import xesam.query
import xesam.server
import gobject
import sys

import datetime
import recoll

def timestampToIso8601(ts):
    return datetime.datetime.fromtimestamp(long(ts)).isoformat()

class RecollServer (xesam.server.Searcher):
    """
        
    """
    
    def __init__ (self):
        h_fact = xesam.server.HandleFactory ()
        fact = xesam.server.ClientFactory (self, h_fact, 
                                           RecollSession, RecollSearch)
        xesam.server.Searcher.__init__ (self, h_fact, fact)
        self.set_echo_queries (True)
        self.rcldb = recoll.connect()

    def start (self):
        # Export our selves via a SearchServerStub
        xesam.server.SearchServerStub(self).start()
    
    def GetProperty (self, shandle, name):
        prop = xesam.server.Searcher.GetProperty(self, shandle, name)
        xesam.debug ("Got property request for '%s' on session '%s', returning %s" % (name, shandle, prop))
        return prop
    
    def SetProperty (self, shandle, name, value):
        val = xesam.server.Searcher.SetProperty(self, shandle, name, value)
        xesam.debug ("Set property request for '%s=%s', on session '%s', returning %s" % (name, value, shandle,val))
        return val

class RecollSession (xesam.server.Session):
    """
    
    """
    def __init__ (self, searcher, session_handle):
        xesam.server.Session.__init__ (self, searcher, session_handle)
        self.set_property ("recoll.org", "xesam-recoll-service")
        
class RecollSearch (xesam.server.Search):
    """
        
    """

    SLICE = 10

    def __init__ (self, searcher, session, search_handle, \
                      query=None, xml=None) :
        # Our parent class does possible xml-to-query parsing. We end up
        # with a query
        xesam.server.Search.__init__ (self, searcher, session, search_handle, \
                             query=query, xml=xml)
       
        self._hit_fields = session.get_property (xesam.SESSION_HIT_FIELDS)
        if self._hit_fields is None:
            xesam.error ("Got property hit.fields as None."
                             " Setting default xesam:url")
            self._hit_fields = ["xesam:url"]
        xesam.debug("RecollSearch: fields:" % self._hit_fields)
        xesam.debug ("Created %s with handle %s and query:\n%s" % 
                     (self.__class__, self.get_handle(), self.get_query()))

        # Instantiate a recoll query 
        self.rclquery = self._searcher.rcldb.query()

        # In the latest version (>0.95), primary/secondary is replaced by 
        # a field list.
        sortfield = session.get_property(xesam.SESSION_SORT_PRIMARY)
        order = session.get_property(xesam.SESSION_SORT_ORDER)

        # xesam-tool does not know how to set these for now, so let's
        # TEST here
        sortfield = "contentModified"
        order = "descending"
        xesam.debug("Session sort primary %s order %s" % (sortfield, order))
        # END TEST

        if sortfield:
            self.rclquery.sortby(sortfield, order == "ascending" and 1 or 0)

    def doc_to_hit(self, doc):
        data = []
        for fld in self._hit_fields:
            # Need to handle ContentCategory and SourceCategory
            fld = fld.lower().replace("xesam:", "")
            xesam.debug("Adding data for fld %s" % (fld))
            if fld == "snippet":
                data.append(self._searcher.rcldb.makeDocAbstract(doc, 
                                                                 self.rclquery))
            elif fld == "contentmodified":
                data.append(timestampToIso8601(getattr(doc, "mtime")))
            else:
                data.append(getattr(doc, fld, ""))
        return data

    def start (self):
        xesam.debug ("RecollSearch")

        if isinstance(self.get_query(), xesam.query.UserQuery):
            self.nres = self.rclquery.execute(self.get_query().get_string())
        elif isinstance(self.get_query(), xesam.query.CompositeQuery):
            self.build_search(None)
            self.nres = self.rclquery.executesd(self.searchdata)
        else:
            raise Exception ("Neither UserQuery nor composite query ??")

        hits = 0
        done = 0
        while self.rclquery.next >= 0 and self.rclquery.next < self.nres:
            doc = self.rclquery.fetchone()
            data = self.doc_to_hit(doc)
            self.add_new_hit (self._hit_fields, data)
            hits += 1
            if hits >= self.SLICE:
                break
        else:
            done = 1

        if hits > 0:
            self.emit ("hits-added", hits)
            xesam.debug ("Search '%s' emitted 'hits-added' %d" % \
                         (self.get_handle(), hits))
        if done:
            self.emit ("done")
            xesam.debug ("Search '%s' emitted 'done'" % self.get_handle())
            self.stop()


    def get_hits(self, num_hits):
        xesam.debug ("RecollSearch get_hits")

        if self._stopped:
            return xesam.server.Search.get_hits(self, num_hits)

        hits = 0
        done = 0
        while self.rclquery.next >= 0 and self.rclquery.next < self.nres:
            doc = self.rclquery.fetchone()
            data = self.doc_to_hit(doc)
            self.add_new_hit (self._hit_fields, data)
            hits += 1
            if hits >= self.SLICE or hits >= num_hits:
                break
        else:
            done = 1

        if hits > 0:
            self.emit ("hits-added", hits)
            xesam.debug ("Search '%s' emitted 'hits-added' %d" % \
                         (self.get_handle(), hits))
        if done:
            self.emit ("done")
            xesam.debug ("Search '%s' emitted 'done'" % self.get_handle())
            self.stop()

        return xesam.server.Search.get_hits(self, num_hits)

    # Build Recoll searchData tree out of Xesam CompositeQuery
    # This is recursive, parent is the current SearchData into which we are
    # adding clauses/subsearches.
    def build_search(self, parent = None):

Arrete apres le hackfest parce qu'apparemment il y a de gros changements la
dedans.

Il manque plein de trucs, comme traiter les "names" des selectors (see
xesam/query.py: SELECTORS, traiter les differents types de values, et leurs
attributs (phrase, slack, ordered etc, cf xesam/query.py: StringValue


        xq = self.get_query()
        cl = xq.get_clause()
        if isinstance(cl, xesam.query.SelectorClause):
            xesam.debug ("Selector clause")
            if parent is None:
                # Top clause is a selector. Needs to have a parent
                self.searchdata = recoll.SearchData()
                parent = self.searchdata
            
            # Fields. If there are several we must turn this into an OR
            # combination of searches on the different fields
            nfields = length(cl.get_fields())
            if nfields > 1:
                orsearch = recoll.SearchData(type = "or")
                for fld in cl.get_fields():
                    simple_selec_addclause(self, orsearch, cl, fld)
                parent.addclause(type = "sub", subSearch = orsearch)
            elif nfields == 1:
                simple_selec_addclause(self, parent, cl, cl.get_fields[0])
            else:
                simple_selec_addclause(self, parent, cl)
        elif isinstance(cl, xesam.query.CollectorClause):
            xesam.debug ("Collector clause")
        else:
            raise Exception("Non Collector or Selector Clause found")

    # Add Recoll clause for xesam selector clause, dealing with at most
    # one field.
    def simple_selec_addclause(self, parent, cl, fld = ""):
        parent.addclause(type = "and", field = fld, 
                         qstring = cl.get_value().get())

if __name__ == "__main__":
    RecollServer().start()