Parent: [ccf63e] (diff)

Child: [42ea67] (diff)

Download this file

graph.py    243 lines (202 with data), 8.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
from itertools import product
from collections import defaultdict, namedtuple
class MigrationGraph(object):
def __init__(self, migrations):
self._build_graph(migrations)
def _build_graph(self, migrations):
'''Build a graph where the nodes are possible migration states and the
edges are transitions between states allowed by migrations.
'''
# Generate all states referenced by the given migrations. Also index
# nodes by state.
versions = defaultdict(lambda: [-1])
for mod,version in migrations:
versions[mod].append(version)
self._State = namedtuple('State', versions)
self._modules = versions.keys()
self._nodes = [ Node(self._State(*ver)) for ver in product(*versions.values()) ]
self.node_by_state = dict((n.state, n) for n in self._nodes)
# Index the nodes by (mod,version)
self._index = defaultdict(list)
for n in self._nodes:
for m in self._modules:
v = getattr(n.state, m)
self._index[m,v].append(n)
# Add edges for all the migrations
for m in migrations.itervalues():
for direction in 'up', 'down':
ms = MigrateStep(self, m, direction)
for prev, next in ms.transitions():
prev.succs.append((next, ms))
def nodes_with(self, requirements):
'''Return list of nodes that match the requirements listed in
requirements, which is either a dict or list of (mod,version) pairs.'''
if isinstance(requirements, dict):
requirements = requirements.iteritems()
nodes = None
for (mod, ver) in requirements:
if nodes is None: nodes = set(self._index[mod,ver])
else: nodes &= set(self._index[mod,ver])
return nodes
def shortest_path(self, start_requirements, end_requirements):
'''Dijkstra's algorithm for shortest path from the start Node to any end
Node'''
# Find the start node
start = dict((m, -1) for m in self._modules)
start.update(start_requirements)
start = dict((str(k), v) for k,v in start.iteritems()
if k in self._State._fields)
start_state = self._State(**start)
start = self.node_by_state[start_state]
# Find the end node(s)
end = self.nodes_with(end_requirements)
# Run the algorithm
start.distance = 0
nodes = priority_dict(
(node, node.distance)
for node in self._nodes)
while nodes:
cur = nodes.pop_smallest()
if cur.distance is None: # pragma no cover
raise ValueError, 'No migration path exists from %s to %s' % (
start, end)
if cur in end:
return list(cur.path())
cur.visit(nodes)
def as_dot(self): # pragma no cover
yield 'digraph G {'
for n in self._nodes:
yield 'node_%d[label="%r"];' % (id(n), n.state)
for n in self._nodes:
for (next, ms) in n.succs:
yield 'node_%d->node_%d[label="%r"];' % (id(n), id(next), ms)
yield '}'
class MigrateStep(object):
'''Object representing a single migration step in a single direction (either
up or down'''
def __init__(self, graph, migration, direction):
self.graph = graph
self.migration = migration
self.direction = direction
def transitions(self):
'''Returns all node->node transitions made possible by this migratestep'''
if self.direction == 'up':
reqs = self.migration.up_requires()
postcondition = self.migration.up_postcondition()
else:
reqs = self.migration.down_requires()
postcondition = self.migration.down_postcondition()
for prev in self.graph.nodes_with(reqs):
next_state = prev.state._replace(**postcondition)
next = self.graph.node_by_state[next_state]
yield prev, next
def apply(self, state):
'''Actually run the migration, updating the state passed in'''
if self.direction == 'up':
self.migration.up()
state.update(self.migration.up_postcondition())
else:
self.migration.down()
state.update(self.migration.down_postcondition())
def __repr__(self): # pragma no cover
return '<%s.%s %s>' % (
self.migration.module,
self.migration.version,
self.direction)
class Node(object):
def __init__(self, state):
self.state = state
self.visited = False
self.distance = 1e9 # effectively inf
self.pred = None # (state, migrationstep)
self.succs = [] # list of (state, migrationstep)
def visit(self, nodes):
'''The 'visit' step of Dijkstra's shortest-path algorithm'''
self.visited = True
new_dist = self.distance + 1
for succ, ms in self.succs:
if succ.visited: continue
if new_dist < succ.distance:
succ.distance = new_dist
succ.pred = (self, ms)
nodes[succ] = new_dist
def path(self):
'''Read back the shortest path from the 'predecessor' field'''
if self.pred:
for p in self.pred[0].path():
yield p
yield self.pred[1]
def __repr__(self): # pragma no cover
return '<Node %r (%s)>' % (self.state,self.distance)
# priority dictionary recipe copied from
# http://code.activestate.com/recipes/522995-priority-dict-a-priority-queue-with-updatable-prio/
# We use this rather than the raw heap because the priority_dict allows us to
# update the priority of a node, which heapq does not (natively) allow without
# re-running heapify() each time a priority changes. (And priorities change
# often in Dijkstra's algorithm.)
from heapq import heapify, heappush, heappop
class priority_dict(dict):
"""Dictionary that can be used as a priority queue.
Keys of the dictionary are items to be put into the queue, and values
are their respective priorities. All dictionary methods work as expected.
The advantage over a standard heapq-based priority queue is
that priorities of items can be efficiently updated (amortized O(1))
using code as 'thedict[item] = new_priority.'
The 'smallest' method can be used to return the object with lowest
priority, and 'pop_smallest' also removes it.
The 'sorted_iter' method provides a destructive sorted iterator.
"""
def __init__(self, *args, **kwargs):
super(priority_dict, self).__init__(*args, **kwargs)
self._rebuild_heap()
def _rebuild_heap(self):
self._heap = [(v, k) for k, v in self.iteritems()]
heapify(self._heap)
def smallest(self):
"""Return the item with the lowest priority.
Raises IndexError if the object is empty.
"""
heap = self._heap
v, k = heap[0]
while k not in self or self[k] != v:
heappop(heap)
v, k = heap[0]
return k
def pop_smallest(self):
"""Return the item with the lowest priority and remove it.
Raises IndexError if the object is empty.
"""
heap = self._heap
v, k = heappop(heap)
while k not in self or self[k] != v:
v, k = heappop(heap)
del self[k]
return k
def __setitem__(self, key, val):
# We are not going to remove the previous value from the heap,
# since this would have a cost O(n).
super(priority_dict, self).__setitem__(key, val)
if len(self._heap) < 2 * len(self):
heappush(self._heap, (val, key))
else:
# When the heap grows larger than 2 * len(self), we rebuild it
# from scratch to avoid wasting too much memory.
self._rebuild_heap()
def setdefault(self, key, val):
if key not in self:
self[key] = val
return val
return self[key]
def update(self, *args, **kwargs):
# Reimplementing dict.update is tricky -- see e.g.
# http://mail.python.org/pipermail/python-ideas/2007-May/000744.html
# We just rebuild the heap from scratch after passing to super.
super(priority_dict, self).update(*args, **kwargs)
self._rebuild_heap()
def sorted_iter(self):
"""Sorted iterator of the priority dictionary items.
Beware: this will destroy elements as they are returned.
"""
while self:
yield self.pop_smallest()
# End recipe