Switch to unified view

a/Allura/test-light.py b/Allura/test-light.py
...
...
3
from collections import defaultdict
3
from collections import defaultdict
4
from itertools import chain, izip
4
from itertools import chain, izip
5
from datetime import datetime
5
from datetime import datetime
6
6
7
from pylons import c
7
from pylons import c
8
from pymongo.errors import DuplicateKeyError
8
9
9
from ming.base import Object
10
from ming.base import Object
10
11
11
from allura import model as M
12
from allura import model as M
12
from allura.lib import helpers as h
13
from allura.lib import helpers as h
...
...
55
    for i, oid in enumerate(commit_ids):
56
    for i, oid in enumerate(commit_ids):
56
        ci = repo.rev_parse(oid)
57
        ci = repo.rev_parse(oid)
57
        refresh_commit_info(ci, seen)
58
        refresh_commit_info(ci, seen)
58
        if (i+1) % 100 == 0:
59
        if (i+1) % 100 == 0:
59
            log.info('Refresh commit info %d: %s', (i+1), oid)
60
            log.info('Refresh commit info %d: %s', (i+1), oid)
61
62
    #############################################
63
    # Everything below here is repo-agnostic
64
    #############################################
65
66
    refresh_repo(commit_ids, c.app.repo._id)
60
67
61
    # Refresh child references
68
    # Refresh child references
62
    seen = set()
69
    seen = set()
63
    parents = set()
70
    parents = set()
64
71
...
...
77
            log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id)
84
            log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id)
78
85
79
    # Refresh basic blocks
86
    # Refresh basic blocks
80
    bbb = BasicBlockBuilder(commit_ids)
87
    bbb = BasicBlockBuilder(commit_ids)
81
    bbb.run()
88
    bbb.run()
89
    bbb.cleanup()
82
90
83
    # Verify the log
91
    # Verify the log
84
    log.info('Logging via basic blocks')
92
    log.info('Logging via basic blocks')
85
    with open('log.txt', 'w') as fp:
86
        for i, ci in enumerate(commitlog(commit_ids[0])):
93
    for i, ci in enumerate(commitlog(commit_ids[0], skip=2000, limit=50)):
87
            print >> fp, repr(ci)
94
        pass
88
            log.info('%r', ci)
89
    log.info('... done (%d commits from %s)', i, commit_ids[0])
95
    log.info('... done (%d commits from %s)', i+1, commit_ids[0])
90
96
91
    # Refresh trees
97
    # Refresh trees
92
    cache = {}
98
    cache = {}
93
    for i, oid in enumerate(commit_ids):
99
    for i, oid in enumerate(commit_ids):
94
        ci = M.repo.Commit.m.get(_id=oid)
100
        ci = M.repo.Commit.m.get(_id=oid)
...
...
113
        (oid, cache[oid])
119
        (oid, cache[oid])
114
        for oid in trees_doc.tree_ids)
120
        for oid in trees_doc.tree_ids)
115
    return new_cache
121
    return new_cache
116
122
117
def refresh_commit_info(ci, seen):
123
def refresh_commit_info(ci, seen):
124
    if M.repo.Commit.m.find(dict(_id=ci.hexsha)).count() != 0:
125
        return
126
    try:
118
    ci_doc = M.repo.Commit(dict(
127
        ci_doc = M.repo.Commit(dict(
119
            _id=ci.hexsha,
128
                _id=ci.hexsha,
120
            tree_id=ci.tree.hexsha,
129
                tree_id=ci.tree.hexsha,
121
            committed = Object(
130
                committed = Object(
122
                name=h.really_unicode(ci.committer.name),
131
                    name=h.really_unicode(ci.committer.name),
123
                email=h.really_unicode(ci.committer.email),
132
                    email=h.really_unicode(ci.committer.email),
124
                date=datetime.utcfromtimestamp(
133
                    date=datetime.utcfromtimestamp(
125
                    ci.committed_date-ci.committer_tz_offset)),
134
                        ci.committed_date-ci.committer_tz_offset)),
126
            authored = Object(
135
                authored = Object(
127
                name=h.really_unicode(ci.author.name),
136
                    name=h.really_unicode(ci.author.name),
128
                email=h.really_unicode(ci.author.email),
137
                    email=h.really_unicode(ci.author.email),
129
                date=datetime.utcfromtimestamp(
138
                    date=datetime.utcfromtimestamp(
130
                    ci.authored_date-ci.author_tz_offset)),
139
                        ci.authored_date-ci.author_tz_offset)),
131
            message=h.really_unicode(ci.message or ''),
140
                message=h.really_unicode(ci.message or ''),
132
            child_ids=[],
141
                child_ids=[],
133
            parent_ids = [ p.hexsha for p in ci.parents ]))
142
                parent_ids = [ p.hexsha for p in ci.parents ]))
143
        ci_doc.m.insert(safe=True)
144
    except DuplicateKeyError:
145
        return
134
    refresh_tree(ci.tree, seen)
146
    refresh_tree(ci.tree, seen)
135
    ci_doc.m.save(safe=False)
147
136
    return True
148
def refresh_repo(commit_ids, repo_id):
149
    for oids in utils.chunked_iter(commit_ids, QSIZE):
150
        oids = list(oids)
151
        M.repo.Commit.m.update_partial(
152
            dict(
153
                _id={'$in': oids},
154
                repo_ids={'$ne': repo_id}),
155
            {'$addToSet': dict(repo_ids=repo_id)},
156
            multi=True)
137
157
138
def refresh_children(ci):
158
def refresh_children(ci):
139
    '''
140
    TODO: make sure we remove basic blocks created by previous refreshes when
141
    there are extra children added.
142
    '''
143
    M.repo.Commit.m.update_partial(
159
    M.repo.Commit.m.update_partial(
144
        dict(_id={'$in': ci.parent_ids}),
160
        dict(_id={'$in': ci.parent_ids}),
145
        {'$addToSet': dict(child_ids=ci._id)})
161
        {'$addToSet': dict(child_ids=ci._id)},
162
        multi=True)
146
163
147
class BasicBlockBuilder(object):
164
class BasicBlockBuilder(object):
148
165
149
    def __init__(self, commit_ids):
166
    def __init__(self, commit_ids):
150
        self.commit_ids = commit_ids
167
        self.commit_ids = commit_ids
...
...
170
            log.info('%32s: %r', self.reasons.get(bid, 'none'), bb)
187
            log.info('%32s: %r', self.reasons.get(bid, 'none'), bb)
171
        for bb in self.blocks.itervalues():
188
        for bb in self.blocks.itervalues():
172
            bb.score = len(bb.commit_ids)
189
            bb.score = len(bb.commit_ids)
173
            bb.m.save()
190
            bb.m.save()
174
        return self.blocks
191
        return self.blocks
192
193
    def _all_blocks(self):
194
        blocks = {}
195
        for oids in utils.chunked_iter(self.commit_ids, QSIZE):
196
            oids = list(oids)
197
            for bb in M.repo.BasicBlock.m.find(dict(commit_ids={'$in': oids})):
198
                blocks[bb._id] = bb
199
        seen_bids = set()
200
        blocks = blocks.values()
201
        while blocks:
202
            bb = blocks.pop()
203
            if bb._id in seen_bids: continue
204
            seen_bids.add(bb._id)
205
            yield bb
206
            for bb in M.repo.BasicBlock.m.find(
207
                dict(commit_ids={'$in':bb.parent_commit_ids})):
208
                blocks.append(bb)
209
210
    def cleanup(self):
211
        '''Delete non-maximal basic blocks'''
212
        for bb1 in self._all_blocks():
213
            for bb2 in M.repo.BasicBlock.m.find(dict(
214
                    commit_ids=bb1.commit_ids[0])):
215
                if bb2._id == bb1._id: continue
216
                log.info('... delete %r (part of %r)', bb2, bb1)
217
                import pdb; pdb.set_trace()
218
                bb2.m.delete()
175
219
176
    def merge_blocks(self):
220
    def merge_blocks(self):
177
        while True:
221
        while True:
178
            for bid, bb in self.blocks.iteritems():
222
            for bid, bb in self.blocks.iteritems():
179
                if len(bb.parent_commit_ids) != 1:
223
                if len(bb.parent_commit_ids) != 1: