Parent: [7eb795] (diff)

Child: [80ee1b] (diff)

Download this file

taskd.py    185 lines (159 with data), 6.9 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import os
import time
import Queue
from datetime import datetime, timedelta
import signal
import sys
import faulthandler
import pylons
from paste.deploy import loadapp
from paste.deploy.converters import asint
from webob import Request
import base
faulthandler.enable()
class TaskdCommand(base.Command):
summary = 'Task server'
parser = base.Command.standard_parser(verbose=True)
parser.add_option('--only', dest='only', type='string', default=None,
help='only handle tasks of the given name(s) (can be comma-separated list)')
def command(self):
self.basic_setup()
self.keep_running = True
self.restart_when_done = False
base.log.info('Starting taskd, pid %s' % os.getpid())
signal.signal(signal.SIGHUP, self.graceful_restart)
signal.signal(signal.SIGTERM, self.graceful_stop)
signal.signal(signal.SIGUSR1, self.log_current_task)
# restore default behavior of not interrupting system calls
# see http://docs.python.org/library/signal.html#signal.siginterrupt
# and http://linux.die.net/man/3/siginterrupt
signal.siginterrupt(signal.SIGHUP, False)
signal.siginterrupt(signal.SIGTERM, False)
signal.siginterrupt(signal.SIGUSR1, False)
self.worker()
def graceful_restart(self, signum, frame):
base.log.info('taskd pid %s recieved signal %s preparing to do a graceful restart' % (os.getpid(), signum))
self.keep_running = False
self.restart_when_done = True
def graceful_stop(self, signum, frame):
base.log.info('taskd pid %s recieved signal %s preparing to do a graceful stop' % (os.getpid(), signum))
self.keep_running = False
def log_current_task(self, signum, frame):
base.log.info('taskd pid %s is currently handling task %s' % (os.getpid(), getattr(self, 'task', None)))
def worker(self):
from allura import model as M
name = '%s pid %s' % (os.uname()[1], os.getpid())
wsgi_app = loadapp('config:%s#task' % self.args[0],relative_to=os.getcwd())
poll_interval = asint(pylons.config.get('monq.poll_interval', 10))
only = self.options.only
if only:
only = only.split(',')
# errors get logged via regular logging and also recorded into the mongo task record
# so this is generally not needed, and only present to avoid errors within
# weberror's ErrorMiddleware if the default error stream (stderr?) doesn't work
wsgi_error_log = open(pylons.config.get('taskd.wsgi_log', '/dev/null'), 'a')
def start_response(status, headers, exc_info=None):
pass
def waitfunc_amqp():
try:
return pylons.g.amq_conn.queue.get(timeout=poll_interval)
except Queue.Empty:
return None
def waitfunc_noq():
time.sleep(poll_interval)
def check_running(func):
def waitfunc_checks_running():
if self.keep_running:
return func()
else:
return None
return waitfunc_checks_running
if pylons.g.amq_conn:
waitfunc = waitfunc_amqp
else:
waitfunc = waitfunc_noq
waitfunc = check_running(waitfunc)
while self.keep_running:
if pylons.g.amq_conn:
pylons.g.amq_conn.reset()
try:
while self.keep_running:
self.task = M.MonQTask.get(
process=name,
waitfunc=waitfunc,
only=only)
if self.task:
# Build the (fake) request
r = Request.blank('/--%s--/' % self.task.task_name,
{'task': self.task,
'wsgi.errors': wsgi_error_log, # ErrorMiddleware records error details here
})
list(wsgi_app(r.environ, start_response))
self.task = None
except Exception as e:
if self.keep_running:
base.log.exception('taskd error %s; pausing for 10s before taking more tasks' % e)
time.sleep(10)
else:
base.log.exception('taskd error %s' % e)
finally:
wsgi_error_log.flush()
base.log.info('taskd pid %s stopping gracefully.' % os.getpid())
if self.restart_when_done:
base.log.info('taskd pid %s restarting itself' % os.getpid())
os.execv(sys.argv[0], sys.argv)
class TaskCommand(base.Command):
summary = 'Task command'
parser = base.Command.standard_parser(verbose=True)
parser.add_option('-s', '--state', dest='state', default='ready',
help='state of processes to examine')
parser.add_option('-t', '--timeout', dest='timeout', type=int, default=60,
help='timeout (in seconds) for busy tasks')
min_args = 2
max_args = None
usage = '<ini file> [list|retry|purge|timeout|commit]'
def command(self):
self.basic_setup()
cmd = self.args[1]
tab = dict(
list=self._list,
retry=self._retry,
purge=self._purge,
timeout=self._timeout,
commit=self._commit)
tab[cmd]()
def _list(self):
'''List tasks'''
from allura import model as M
base.log.info('Listing tasks of state %s', self.options.state)
if self.options.state == '*':
q = dict()
else:
q = dict(state=self.options.state)
for t in M.MonQTask.query.find(q):
print t
def _retry(self):
'''Retry tasks in an error state'''
from allura import model as M
base.log.info('Retry tasks in error state')
M.MonQTask.query.update(
dict(state='error'),
{'$set': dict(state='ready')},
multi=True)
def _purge(self):
'''Purge completed tasks'''
from allura import model as M
base.log.info('Purge complete/forget tasks')
M.MonQTask.query.remove(
dict(state='complete', result_type='forget'))
def _timeout(self):
'''Reset tasks that have been busy too long to 'ready' state'''
from allura import model as M
base.log.info('Reset tasks stuck for %ss or more', self.options.timeout)
cutoff = datetime.utcnow() - timedelta(seconds=self.options.timeout)
M.MonQTask.timeout_tasks(cutoff)
def _commit(self):
'''Schedule a SOLR commit'''
from allura.tasks import index_tasks
base.log.info('Commit to solr')
index_tasks.commit.post()