Child: [265cb3] (diff)

Download this file

search.py    120 lines (105 with data), 3.7 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import re
import cPickle as pickle
from logging import getLogger
from pprint import pformat
from itertools import islice, chain
import markdown
from pylons import c,g
import pysolr
from . import helpers as h
from .markdown_extensions import ForgeExtension
# from allura.tasks.search import AddArtifacts, DelArtifacts
# re_SHORTLINK = re.compile(ForgeExtension.core_artifact_link)
re_SOLR_ERROR = re.compile(r'<pre>(org.apache.lucene[^:]+: )?(?P<text>[^<]+)</pre>')
log = getLogger(__name__)
def try_solr(func):
def inner(*args, **kwargs):
try:
return func(*args, **kwargs)
except: # pragma no cover
log.exception('Error in solr indexing')
return inner
def solarize(obj):
if obj is None: return None
doc = obj.index()
if doc is None: return None
text = doc.pop('text', '')
try:
text = text + pformat(doc.values())
except TypeError:
# log.exception('Indexing empty text: %s', doc)
text = pformat(doc.values())
doc['text'] = text
return doc
@try_solr
def add_artifacts(obj_iter):
artifact_iterator = ( o.dump_ref() for o in obj_iter)
while True:
artifacts = list(islice(artifact_iterator, 1000))
for aref in artifacts:
aname = pickle.loads(aref.artifact_type).__name__
h.log_action(log, 'upsert artifact').info(
'upsert artifact %s', aname,
meta=dict(
type=aname,
id=aref.artifact_id))
if not artifacts: break
g.publish('react', 'artifacts_altered',
dict(artifacts=artifacts),
serializer='pickle')
@try_solr
def remove_artifacts(obj_iter):
artifact_iterator = ( o.dump_ref() for o in obj_iter)
while True:
artifacts = list(islice(artifact_iterator, 1000))
for aref in artifacts:
aname = pickle.loads(aref.artifact_type).__name__
h.log_action(log, 'delete artifact').info(
'delete artifact %s', aname,
meta=dict(
type=aname,
id=aref.artifact_id))
if not artifacts: break
g.publish('react', 'artifacts_removed',
dict(artifacts=artifacts),
serializer='pickle')
@try_solr
def search(q,**kw):
return g.solr.search(q, **kw)
def search_artifact(atype, q, history=False, rows=10, **kw):
"""Performs SOLR search.
Raises ValueError if SOLR returns an error.
"""
# first, grab an artifact and get the fields that it indexes
a = atype.query.find().first()
if a is None: return # if there are no instance of atype, we won't find anything
fields = a.index()
# Now, we'll translate all the fld:
for f in fields:
if f[-2] == '_':
base = f[:-2]
actual = f
q = q.replace(base+':', actual+':')
fq = [
'type_s:%s' % fields['type_s'],
'project_id_s:%s' % c.project._id,
'mount_point_s:%s' % c.app.config.options.mount_point ]
if not history:
fq.append('is_history_b:False')
try:
return g.solr.search(q, fq=fq, rows=rows, **kw)
except pysolr.SolrError, e:
log.info("Solr error: %s", e)
m = re_SOLR_ERROR.search(e.message)
if m:
text = m.group('text')
else:
text = "syntax error?"
raise ValueError(text)
def find_shortlinks(text):
md = markdown.Markdown(
extensions=['codehilite', ForgeExtension(), 'tables'],
output_format='html4')
md.convert(text)
link_index = md.postprocessors['forge'].parent.alinks
return [ link for link in link_index.itervalues() if link is not None]