Switch to unified view

a/Allura/test-light.py b/Allura/test-light.py
1
import sys
1
import sys
2
import logging
2
import logging
3
from collections import defaultdict
3
from collections import defaultdict
4
from itertools import chain, izip
4
from itertools import chain, izip
5
from datetime import datetime
5
from datetime import datetime
6
from cPickle import dumps
6
7
8
import bson
7
from pylons import c
9
from pylons import c
8
from pymongo.errors import DuplicateKeyError
10
from pymongo.errors import DuplicateKeyError
9
11
10
from ming.base import Object
12
from ming.base import Object
11
13
12
from allura import model as M
13
from allura.lib import helpers as h
14
from allura.lib import helpers as h
14
from allura.lib import utils
15
from allura.lib import utils
16
from allura.model.repo import CommitDoc, TreeDoc, TreesDoc, DiffInfoDoc
17
from allura.model.repo import LastCommitDoc, CommitRunDoc
18
from allura.model.repo import Commit
19
from allura.model.index import ArtifactReferenceDoc, ShortlinkDoc
15
20
16
log = logging.getLogger(__name__)
21
log = logging.getLogger(__name__)
17
22
18
QSIZE=100
23
QSIZE=100
19
20
def dolog():
21
    h.set_context('test', 'code')
22
    repo = c.app.repo._impl._git
23
    oid = repo.commit(repo.heads[0]).hexsha
24
    log.info('start')
25
    for i, ci in enumerate(commitlog(oid)):
26
        print repr(ci)
27
    log.info('done')
28
24
29
def main():
25
def main():
30
    if len(sys.argv) > 1:
26
    if len(sys.argv) > 1:
31
        h.set_context('test')
27
        h.set_context('test')
32
        c.project.install_app('Git', 'code', 'Code', init_from_url='/home/rick446/src/forge')
28
        c.project.install_app('Git', 'code', 'Code', init_from_url='/home/rick446/src/forge')
33
    h.set_context('test', 'code')
29
    h.set_context('test', 'code')
34
    M.repo.Commit.m.remove({})
30
    CommitDoc.m.remove({})
35
    M.repo.Tree.m.remove({})
31
    TreeDoc.m.remove({})
36
    M.repo.Trees.m.remove({})
32
    TreesDoc.m.remove({})
37
    M.repo.DiffInfo.m.remove({})
33
    DiffInfoDoc.m.remove({})
38
    M.repo.LastCommit.m.remove({})
34
    LastCommitDoc.m.remove({})
39
    M.repo.BasicBlock.m.remove({})
35
    CommitRunDoc.m.remove({})
40
    repo = c.app.repo._impl._git
41
36
42
    # Get all commits
37
    # Get all commits (repo-specific)
43
    seen = set()
38
    all_commit_ids = list(c.app.repo.all_commit_ids())
44
    all_commit_ids = []
45
    for head in repo.heads:
46
        for ci in repo.iter_commits(head, topo_order=True):
47
            if ci.binsha in seen: continue
48
            seen.add(ci.binsha)
49
            all_commit_ids.append(ci.hexsha)
50
39
51
    # Skip commits that are already in the DB
40
    # Skip commits that are already in the DB (repo-agnostic)
52
    commit_ids = unknown_commit_ids(all_commit_ids)
41
    commit_ids = unknown_commit_ids(all_commit_ids)
53
    # commit_ids = commit_ids[:500]
42
    # commit_ids = commit_ids[:500]
54
    log.info('Refreshing %d commits', len(commit_ids))
43
    log.info('Refreshing %d commits', len(commit_ids))
55
44
56
    # Refresh commits
45
    # Refresh commits (repo-specific)
46
    seen = set()
57
    for i, oid in enumerate(commit_ids):
47
    for i, oid in enumerate(commit_ids):
58
        ci = repo.rev_parse(oid)
59
        refresh_commit_info(ci, seen)
48
        c.app.repo.refresh_commit_info(oid, seen)
60
        if (i+1) % 100 == 0:
49
        if (i+1) % 100 == 0:
61
            log.info('Refresh commit info %d: %s', (i+1), oid)
50
            log.info('Refresh commit info %d: %s', (i+1), oid)
62
51
63
    #############################################
52
    #############################################
64
    # Everything below here is repo-agnostic
53
    # Everything below here is repo-agnostic
65
    #############################################
54
    #############################################
66
55
67
    refresh_repo(commit_ids, c.app.repo._id)
56
    refresh_repo(commit_ids, c.app.repo)
68
57
69
    # Refresh child references
58
    # Refresh child references
70
    seen = set()
59
    seen = set()
71
    parents = set()
60
    parents = set()
72
61
73
    for i, oid in enumerate(commit_ids):
62
    for i, oid in enumerate(commit_ids):
74
        ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
63
        ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
75
        refresh_children(ci)
64
        refresh_children(ci)
76
        seen.add(ci._id)
65
        seen.add(ci._id)
77
        parents.update(ci.parent_ids)
66
        parents.update(ci.parent_ids)
78
        if (i+1) % 100 == 0:
67
        if (i+1) % 100 == 0:
79
            log.info('Refresh child (a) info %d: %s', (i+1), ci._id)
68
            log.info('Refresh child (a) info %d: %s', (i+1), ci._id)
80
    for j, oid in enumerate(parents-seen):
69
    for j, oid in enumerate(parents-seen):
81
        try:
70
        try:
82
            ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
71
            ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
83
        except StopIteration:
72
        except StopIteration:
84
            continue
73
            continue
85
        refresh_children(ci)
74
        refresh_children(ci)
86
        if (i + j + 1) % 100 == 0:
75
        if (i + j + 1) % 100 == 0:
87
            log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id)
76
            log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id)
88
77
89
    # Refresh basic blocks
78
    # Refresh commit runs
90
    bbb = BasicBlockBuilder(commit_ids)
79
    rb = CommitRunBuilder(commit_ids)
91
    bbb.run()
80
    rb.run()
92
    bbb.cleanup()
81
    rb.cleanup()
93
94
    # Verify the log
95
    log.info('Logging via basic blocks')
96
    for i, ci in enumerate(commitlog(commit_ids[0])):
97
        pass
98
    log.info('... done (%d commits from %s)', i+1, commit_ids[0])
99
82
100
    # Refresh trees
83
    # Refresh trees
101
    cache = {}
84
    cache = {}
102
    for i, oid in enumerate(commit_ids):
85
    for i, oid in enumerate(commit_ids):
103
        ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
86
        ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
104
        cache = refresh_commit_trees(ci, cache)
87
        cache = refresh_commit_trees(ci, cache)
105
        if (i+1) % 100 == 0:
88
        if (i+1) % 100 == 0:
106
            log.info('Refresh commit trees %d: %s', (i+1), ci._id)
89
            log.info('Refresh commit trees %d: %s', (i+1), ci._id)
107
90
108
    # Compute diffs
91
    # Compute diffs
109
    cache = {}
92
    cache = {}
110
    for i, oid in enumerate(commit_ids):
93
    for i, oid in enumerate(commit_ids):
111
        ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
94
        ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
112
        compute_diffs(c.app.repo._id, cache, ci)
95
        compute_diffs(c.app.repo._id, cache, ci)
113
        if (i+1) % 100 == 0:
96
        if (i+1) % 100 == 0:
114
            log.info('Compute diffs %d: %s', (i+1), ci._id)
97
            log.info('Compute diffs %d: %s', (i+1), ci._id)
115
98
116
def refresh_commit_trees(ci, cache):
99
def refresh_commit_trees(ci, cache):
117
    trees_doc = M.repo.Trees(dict(
100
    trees_doc = TreesDoc(dict(
118
            _id=ci._id,
101
            _id=ci._id,
119
            tree_ids = list(trees(ci.tree_id, cache))))
102
            tree_ids = list(trees(ci.tree_id, cache))))
120
    trees_doc.m.save(safe=False)
103
    trees_doc.m.save(safe=False)
121
    new_cache = dict(
104
    new_cache = dict(
122
        (oid, cache[oid])
105
        (oid, cache[oid])
123
        for oid in trees_doc.tree_ids)
106
        for oid in trees_doc.tree_ids)
124
    return new_cache
107
    return new_cache
125
108
126
def refresh_commit_info(ci, seen):
109
def refresh_commit_info(ci, seen):
127
    if M.repo.Commit.m.find(dict(_id=ci.hexsha)).count() != 0:
110
    if CommitDoc.m.find(dict(_id=ci.hexsha)).count() != 0:
128
        return False
111
        return False
129
    try:
112
    try:
130
        ci_doc = M.repo.Commit(dict(
113
        ci_doc = CommitDoc(dict(
131
                _id=ci.hexsha,
114
                _id=ci.hexsha,
132
                tree_id=ci.tree.hexsha,
115
                tree_id=ci.tree.hexsha,
133
                committed = Object(
116
                committed = Object(
134
                    name=h.really_unicode(ci.committer.name),
117
                    name=h.really_unicode(ci.committer.name),
135
                    email=h.really_unicode(ci.committer.email),
118
                    email=h.really_unicode(ci.committer.email),
...
...
147
    except DuplicateKeyError:
130
    except DuplicateKeyError:
148
        return False
131
        return False
149
    refresh_tree(ci.tree, seen)
132
    refresh_tree(ci.tree, seen)
150
    return True
133
    return True
151
134
152
def refresh_repo(commit_ids, repo_id):
135
def refresh_repo(commit_ids, repo):
153
    for oids in utils.chunked_iter(commit_ids, QSIZE):
136
    for oids in utils.chunked_iter(commit_ids, QSIZE):
154
        oids = list(oids)
137
        oids = list(oids)
138
        # Create shortlinks and artifactrefs
139
        for oid in oids:
140
            index_id = 'allura.model.repo.Commit#' + oid
141
            ref = ArtifactReferenceDoc(dict(
142
                    _id=index_id,
143
                    artifact_reference=dict(
144
                        cls=dumps(Commit),
145
                        project_id=repo.app.config.project_id,
146
                    app_config_id=repo.app.config._id,
147
                        artifact_id=oid),
148
                    references=[]))
149
            link = ShortlinkDoc(dict(
150
                    _id=bson.ObjectId(),
151
                    ref_id=index_id,
152
                    project_id=repo.app.config.project_id,
153
                    app_config_id=repo.app.config._id,
154
                    link=repo.shorthand_for_commit(oid),
155
                    url=repo.url() + 'ci/' + oid + '/'))
156
            ref.m.save(safe=False, validate=False)
157
            link.m.save(safe=False, validate=False)
155
        M.repo.Commit.m.update_partial(
158
        CommitDoc.m.update_partial(
156
            dict(
159
            dict(
157
                _id={'$in': oids},
160
                _id={'$in': oids},
158
                repo_ids={'$ne': repo_id}),
161
                repo_ids={'$ne': repo._id}),
159
            {'$addToSet': dict(repo_ids=repo_id)},
162
            {'$addToSet': dict(repo_ids=repo._id)},
160
            multi=True)
163
            multi=True)
161
164
162
def refresh_children(ci):
165
def refresh_children(ci):
163
    M.repo.Commit.m.update_partial(
166
    CommitDoc.m.update_partial(
164
        dict(_id={'$in': ci.parent_ids}),
167
        dict(_id={'$in': ci.parent_ids}),
165
        {'$addToSet': dict(child_ids=ci._id)},
168
        {'$addToSet': dict(child_ids=ci._id)},
166
        multi=True)
169
        multi=True)
167
170
168
class BasicBlockBuilder(object):
171
class CommitRunBuilder(object):
169
172
170
    def __init__(self, commit_ids):
173
    def __init__(self, commit_ids):
171
        self.commit_ids = commit_ids
174
        self.commit_ids = commit_ids
172
        self.block_index = {} # by commit ID
175
        self.run_index = {} # by commit ID
173
        self.blocks = {}          # by block ID
176
        self.runs = {}          # by run ID
174
        self.reasons = {}        # reasons to stop merging blocks
177
        self.reasons = {}    # reasons to stop merging runs
175
178
176
    def run(self):
179
    def run(self):
177
        for oids in utils.chunked_iter(self.commit_ids, QSIZE):
180
        for oids in utils.chunked_iter(self.commit_ids, QSIZE):
178
            oids = list(oids)
181
            oids = list(oids)
179
            commits = list(M.repo.Commit.m.find(dict(_id={'$in':oids})))
182
            commits = list(CommitDoc.m.find(dict(_id={'$in':oids})))
180
            for ci in commits:
183
            for ci in commits:
181
                if ci._id in self.block_index: continue
184
                if ci._id in self.run_index: continue
182
                self.block_index[ci._id] = ci._id
185
                self.run_index[ci._id] = ci._id
183
                self.blocks[ci._id] = M.repo.BasicBlock(dict(
186
                self.runs[ci._id] = CommitRunDoc(dict(
184
                        _id=ci._id,
187
                        _id=ci._id,
185
                        parent_commit_ids=ci.parent_ids,
188
                        parent_commit_ids=ci.parent_ids,
186
                        commit_ids=[ci._id],
189
                        commit_ids=[ci._id],
187
                        commit_times=[ci.authored.date]))
190
                        commit_times=[ci.authored.date]))
188
            self.merge_blocks()
191
            self.merge_runs()
189
        log.info('%d basic blocks', len(self.blocks))
192
        log.info('%d runs', len(self.runs))
190
        for bid, bb in sorted(self.blocks.items()):
193
        for rid, run in sorted(self.runs.items()):
191
            log.info('%32s: %r', self.reasons.get(bid, 'none'), bb)
194
            log.info('%32s: %r', self.reasons.get(rid, 'none'), run._id)
192
        for bb in self.blocks.itervalues():
195
        for run in self.runs.itervalues():
193
            bb.m.save()
196
            run.m.save()
194
        return self.blocks
197
        return self.runs
195
198
196
    def _all_blocks(self):
199
    def _all_runs(self):
197
        blocks = {}
200
        runs = {}
198
        for oids in utils.chunked_iter(self.commit_ids, QSIZE):
201
        for oids in utils.chunked_iter(self.commit_ids, QSIZE):
199
            oids = list(oids)
202
            oids = list(oids)
200
            for bb in M.repo.BasicBlock.m.find(dict(commit_ids={'$in': oids})):
203
            for run in CommitRunDoc.m.find(dict(commit_ids={'$in': oids})):
201
                blocks[bb._id] = bb
204
                runs[run._id] = run
202
        seen_bids = set()
205
        seen_run_ids = set()
203
        blocks = blocks.values()
206
        runs = runs.values()
204
        while blocks:
207
        while runs:
205
            bb = blocks.pop()
208
            run = runs.pop()
206
            if bb._id in seen_bids: continue
209
            if run._id in seen_run_ids: continue
207
            seen_bids.add(bb._id)
210
            seen_run_ids.add(run._id)
208
            yield bb
211
            yield run
209
            for bb in M.repo.BasicBlock.m.find(
212
            for run in CommitRunDoc.m.find(
210
                dict(commit_ids={'$in':bb.parent_commit_ids})):
213
                dict(commit_ids={'$in':run.parent_commit_ids})):
211
                blocks.append(bb)
214
                runs.append(run)
212
215
213
    def cleanup(self):
216
    def cleanup(self):
214
        '''Delete non-maximal basic blocks'''
217
        '''Delete non-maximal runs'''
215
        for bb1 in self._all_blocks():
218
        for run1 in self._all_runs():
216
            for bb2 in M.repo.BasicBlock.m.find(dict(
219
            for run2 in CommitRunDoc.m.find(dict(
217
                    commit_ids=bb1.commit_ids[0])):
220
                    commit_ids=run1.commit_ids[0])):
218
                if bb2._id == bb1._id: continue
221
                if run1._id == run2._id: continue
219
                log.info('... delete %r (part of %r)', bb2, bb1)
222
                log.info('... delete %r (part of %r)', run2, run1)
220
                import pdb; pdb.set_trace()
221
                bb2.m.delete()
223
                run2.m.delete()
222
224
223
    def merge_blocks(self):
225
    def merge_runs(self):
224
        while True:
226
        while True:
225
            for bid, bb in self.blocks.iteritems():
227
            for run_id, run in self.runs.iteritems():
226
                if len(bb.parent_commit_ids) != 1:
228
                if len(run.parent_commit_ids) != 1:
227
                    self.reasons[bid] = '%d parents' % len(bb.parent_commit_ids)
229
                    self.reasons[run_id] = '%d parents' % len(run.parent_commit_ids)
228
                    continue
230
                    continue
229
                p_oid = bb.parent_commit_ids[0]
231
                p_oid = run.parent_commit_ids[0]
230
                p_bid = self.block_index.get(p_oid)
232
                p_run_id = self.run_index.get(p_oid)
231
                if p_bid is None:
233
                if p_run_id is None:
232
                    self.reasons[bid] = 'parent commit not found'
234
                    self.reasons[run_id] = 'parent commit not found'
233
                    continue
235
                    continue
234
                p_bb = self.blocks.get(p_bid)
236
                p_run = self.runs.get(p_run_id)
235
                if p_bb is None:
237
                if p_run is None:
236
                    self.reasons[bid] = 'parent block not found'
238
                    self.reasons[run_id] = 'parent run not found'
237
                    continue
239
                    continue
238
                if p_bb.commit_ids[0] != p_oid:
240
                if p_run.commit_ids[0] != p_oid:
239
                    self.reasons[bid] = 'parent does not start with parent commit'
241
                    self.reasons[run_id] = 'parent does not start with parent commit'
240
                    continue
242
                    continue
241
                bb.commit_ids += p_bb.commit_ids
243
                run.commit_ids += p_run.commit_ids
242
                bb.commit_times += p_bb.commit_times
244
                run.commit_times += p_run.commit_times
243
                bb.parent_commit_ids = p_bb.parent_commit_ids
245
                run.parent_commit_ids = p_run.parent_commit_ids
244
                for oid in p_bb.commit_ids:
246
                for oid in p_run.commit_ids:
245
                    self.block_index[oid] = bid
247
                    self.run_index[oid] = run_id
246
                break
248
                break
247
            else:
249
            else:
248
                break
250
                break
249
            del self.blocks[p_bid]
251
            del self.runs[p_run_id]
250
252
251
def refresh_tree(t, seen):
253
def refresh_tree(t, seen):
252
    if t.binsha in seen: return
254
    if t.binsha in seen: return
253
    seen.add(t.binsha)
255
    seen.add(t.binsha)
254
    doc = M.repo.Tree(dict(
256
    doc = TreeDoc(dict(
255
            _id=t.hexsha,
257
            _id=t.hexsha,
256
            tree_ids=[],
258
            tree_ids=[],
257
            blob_ids=[],
259
            blob_ids=[],
258
            other_ids=[]))
260
            other_ids=[]))
259
    for o in t:
261
    for o in t:
...
...
272
274
273
def trees(id, cache):
275
def trees(id, cache):
274
    yield id
276
    yield id
275
    entries = cache.get(id, None)
277
    entries = cache.get(id, None)
276
    if entries is None:
278
    if entries is None:
277
        t = M.repo.Tree.m.get(_id=id)
279
        t = TreeDoc.m.get(_id=id)
278
        entries = [ o.id for o in t.tree_ids ]
280
        entries = [ o.id for o in t.tree_ids ]
279
        cache[id] = entries
281
        cache[id] = entries
280
    for i in entries:
282
    for i in entries:
281
        for x in trees(i, cache):
283
        for x in trees(i, cache):
282
            yield x
284
            yield x
283
285
284
def unknown_commit_ids(all_commit_ids):
286
def unknown_commit_ids(all_commit_ids):
285
    result = []
287
    result = []
286
    for chunk in utils.chunked_iter(all_commit_ids, QSIZE):
288
    for chunk in utils.chunked_iter(all_commit_ids, QSIZE):
287
        q = M.repo.Commit.m.find(_id={'$in':chunk})
289
        q = CommitDoc.m.find(_id={'$in':chunk})
288
        known_commit_ids = set(ci._id for ci in q)
290
        known_commit_ids = set(ci._id for ci in q)
289
        result += [ oid for oid in chunk if oid not in known_commit_ids ]
291
        result += [ oid for oid in chunk if oid not in known_commit_ids ]
290
    return result
292
    return result
291
293
292
def compute_diffs(repo_id, tree_cache, rhs_ci):
294
def compute_diffs(repo_id, tree_cache, rhs_ci):
...
...
296
        for x in tree.tree_ids:
298
        for x in tree.tree_ids:
297
            yield x.id
299
            yield x.id
298
            for xx in _walk_tree(tree_index[x.id], tree_index):
300
            for xx in _walk_tree(tree_index[x.id], tree_index):
299
                yield xx
301
                yield xx
300
302
301
    rhs_tree_ids = M.repo.Trees.m.get(_id=rhs_ci._id).tree_ids
303
    rhs_tree_ids = TreesDoc.m.get(_id=rhs_ci._id).tree_ids
302
    if rhs_ci.parent_ids:
304
    if rhs_ci.parent_ids:
303
        lhs_ci = M.repo.Commit.m.get(_id=rhs_ci.parent_ids[0])
305
        lhs_ci = CommitDoc.m.get(_id=rhs_ci.parent_ids[0])
304
    else:
306
    else:
305
        lhs_ci = None
307
        lhs_ci = None
306
    if lhs_ci is not None:
308
    if lhs_ci is not None:
307
        lhs_tree_ids = M.repo.Trees.m.get(_id=lhs_ci._id).tree_ids
309
        lhs_tree_ids = TreesDoc.m.get(_id=lhs_ci._id).tree_ids
308
    else:
310
    else:
309
        lhs_tree_ids = []
311
        lhs_tree_ids = []
310
    new_tree_ids = [
312
    new_tree_ids = [
311
        tid for tid in chain(lhs_tree_ids, rhs_tree_ids)
313
        tid for tid in chain(lhs_tree_ids, rhs_tree_ids)
312
        if tid not in tree_cache ]
314
        if tid not in tree_cache ]
313
    tree_index = dict(
315
    tree_index = dict(
314
        (t._id, t) for t in M.repo.Tree.m.find(dict(_id={'$in': new_tree_ids}),validate=False))
316
        (t._id, t) for t in TreeDoc.m.find(dict(_id={'$in': new_tree_ids}),validate=False))
315
    tree_index.update(tree_cache)
317
    tree_index.update(tree_cache)
316
    rhs_tree_ids_set = set(rhs_tree_ids)
318
    rhs_tree_ids_set = set(rhs_tree_ids)
317
    tree_cache.clear()
319
    tree_cache.clear()
318
    tree_cache.update(
320
    tree_cache.update(
319
        (id, t) for id,t in tree_index.iteritems() if id in rhs_tree_ids_set)
321
        (id, t) for id,t in tree_index.iteritems() if id in rhs_tree_ids_set)
...
...
326
    for name, lhs_id, rhs_id in _diff_trees(lhs_tree, rhs_tree, tree_index):
328
    for name, lhs_id, rhs_id in _diff_trees(lhs_tree, rhs_tree, tree_index):
327
        differences.append(
329
        differences.append(
328
            dict(name=name, lhs_id=lhs_id, rhs_id=rhs_id))
330
            dict(name=name, lhs_id=lhs_id, rhs_id=rhs_id))
329
        # Set last commit info
331
        # Set last commit info
330
        if rhs_id is not None:
332
        if rhs_id is not None:
331
            M.repo.LastCommit.set_last_commit(repo_id, rhs_id, rhs_ci)
333
            _set_last_commit(repo_id, rhs_id, rhs_ci)
332
        rhs_tree = tree_index.get(rhs_id, None)
334
        rhs_tree = tree_index.get(rhs_id, None)
333
        if rhs_tree is not None:
335
        if rhs_tree is not None:
334
            for oid in _walk_tree(rhs_tree, tree_index):
336
            for oid in _walk_tree(rhs_tree, tree_index):
335
                M.repo.LastCommit.set_last_commit(repo_id, oid, rhs_ci)
337
                _set_last_commit(repo_id, oid, rhs_ci)
336
    di = M.repo.DiffInfo(dict(
338
    di = DiffInfoDoc(dict(
337
            _id=rhs_ci._id,
339
            _id=rhs_ci._id,
338
            differences=differences))
340
            differences=differences))
339
    di.m.save()
341
    di.m.save()
340
    return tree_cache
342
    return tree_cache
341
342
def commitlog(commit_id, skip=0, limit=sys.maxint):
343
344
    seen = set()
345
    def _visit(commit_id):
346
        if commit_id in seen: return
347
        bb = M.repo.BasicBlock.m.get(commit_ids=commit_id)
348
        if bb is None: return
349
        index = False
350
        for pos, (oid, time) in enumerate(izip(bb.commit_ids, bb.commit_times)):
351
            if oid == commit_id: index = True
352
            elif not index: continue
353
            seen.add(oid)
354
            ci_times[oid] = time
355
            if pos+1 < len(bb.commit_ids):
356
                ci_parents[oid] = [ bb.commit_ids[pos+1] ]
357
            else:
358
                ci_parents[oid] = bb.parent_commit_ids
359
        for oid in bb.parent_commit_ids:
360
            _visit(oid)
361
362
    def _gen_ids(commit_id, skip, limit):
363
        # Traverse the graph in topo order, yielding commit IDs
364
        commits = set([commit_id])
365
        new_parent = None
366
        while commits and limit:
367
            # next commit is latest commit that's valid to log
368
            if new_parent in commits:
369
                ci = new_parent
370
            else:
371
                ci = max(commits, key=lambda ci:ci_times[ci])
372
            commits.remove(ci)
373
            if skip:
374
                skip -= 1
375
                continue
376
            else:
377
                limit -= 1
378
            yield ci
379
            # remove this commit from its parents children and add any childless
380
            # parents to the 'ready set'
381
            new_parent = None
382
            for oid in ci_parents[ci]:
383
                children = ci_children[oid]
384
                children.discard(ci)
385
                if not children:
386
                    commits.add(oid)
387
                    new_parent = oid
388
389
    # Load all the blocks to build a commit graph
390
    ci_times = {}
391
    ci_parents = {}
392
    ci_children = defaultdict(set)
393
    log.info('Build commit graph')
394
    _visit(commit_id)
395
    for oid, parents in ci_parents.iteritems():
396
        for ci_parent in parents:
397
            ci_children[ci_parent].add(oid)
398
399
    # Convert oids to commit objects
400
    log.info('Traverse commit graph')
401
    for oids in utils.chunked_iter(_gen_ids(commit_id, skip, limit), QSIZE):
402
        oids = list(oids)
403
        index = dict(
404
            (ci._id, ci) for ci in M.repo.Commit.m.find(dict(_id={'$in': oids})))
405
        for oid in oids:
406
            yield index[oid]
407
343
408
def _diff_trees(lhs, rhs, index, *path):
344
def _diff_trees(lhs, rhs, index, *path):
409
    def _fq(name):
345
    def _fq(name):
410
        return '/'.join(reversed(
346
        return '/'.join(reversed(
411
                (name,) + path))
347
                (name,) + path))
...
...
439
        else:
375
        else:
440
            yield (_fq(o.name), o.id, rhs_id)
376
            yield (_fq(o.name), o.id, rhs_id)
441
    for name, id in rhs_blob_ids.items():
377
    for name, id in rhs_blob_ids.items():
442
        yield (_fq(name), None, id)
378
        yield (_fq(name), None, id)
443
379
380
def _set_last_commit(repo_id, oid, commit):
381
    lc = LastCommitDoc(dict(
382
            _id='%s:%s' % (repo_id, oid),
383
            repo_id=repo_id,
384
            object_id=oid,
385
            commit_info=dict(
386
                id=commit._id,
387
                author=commit.authored.name,
388
                author_email=commit.authored.email,
389
                date=commit.authored.date,
390
                # author_url=commit.author_url,
391
                # href=commit.url(),
392
                # shortlink=commit.shorthand_id(),
393
                # summary=commit.summary
394
                )))
395
    lc.m.save(safe=False)
396
    return lc
397
444
if __name__ == '__main__':
398
if __name__ == '__main__':
445
    main()
399
    main()
446
    # dolog()
400
    # dolog()