|
a/Allura/test-light.py |
|
b/Allura/test-light.py |
1 |
import sys
|
1 |
import sys
|
2 |
import logging
|
2 |
import logging
|
3 |
from collections import defaultdict
|
3 |
from collections import defaultdict
|
4 |
from itertools import chain, izip
|
4 |
from itertools import chain, izip
|
5 |
from datetime import datetime
|
5 |
from datetime import datetime
|
|
|
6 |
from cPickle import dumps
|
6 |
|
7 |
|
|
|
8 |
import bson
|
7 |
from pylons import c
|
9 |
from pylons import c
|
8 |
from pymongo.errors import DuplicateKeyError
|
10 |
from pymongo.errors import DuplicateKeyError
|
9 |
|
11 |
|
10 |
from ming.base import Object
|
12 |
from ming.base import Object
|
11 |
|
13 |
|
12 |
from allura import model as M
|
|
|
13 |
from allura.lib import helpers as h
|
14 |
from allura.lib import helpers as h
|
14 |
from allura.lib import utils
|
15 |
from allura.lib import utils
|
|
|
16 |
from allura.model.repo import CommitDoc, TreeDoc, TreesDoc, DiffInfoDoc
|
|
|
17 |
from allura.model.repo import LastCommitDoc, CommitRunDoc
|
|
|
18 |
from allura.model.repo import Commit
|
|
|
19 |
from allura.model.index import ArtifactReferenceDoc, ShortlinkDoc
|
15 |
|
20 |
|
16 |
log = logging.getLogger(__name__)
|
21 |
log = logging.getLogger(__name__)
|
17 |
|
22 |
|
18 |
QSIZE=100
|
23 |
QSIZE=100
|
19 |
|
|
|
20 |
def dolog():
|
|
|
21 |
h.set_context('test', 'code')
|
|
|
22 |
repo = c.app.repo._impl._git
|
|
|
23 |
oid = repo.commit(repo.heads[0]).hexsha
|
|
|
24 |
log.info('start')
|
|
|
25 |
for i, ci in enumerate(commitlog(oid)):
|
|
|
26 |
print repr(ci)
|
|
|
27 |
log.info('done')
|
|
|
28 |
|
24 |
|
29 |
def main():
|
25 |
def main():
|
30 |
if len(sys.argv) > 1:
|
26 |
if len(sys.argv) > 1:
|
31 |
h.set_context('test')
|
27 |
h.set_context('test')
|
32 |
c.project.install_app('Git', 'code', 'Code', init_from_url='/home/rick446/src/forge')
|
28 |
c.project.install_app('Git', 'code', 'Code', init_from_url='/home/rick446/src/forge')
|
33 |
h.set_context('test', 'code')
|
29 |
h.set_context('test', 'code')
|
34 |
M.repo.Commit.m.remove({})
|
30 |
CommitDoc.m.remove({})
|
35 |
M.repo.Tree.m.remove({})
|
31 |
TreeDoc.m.remove({})
|
36 |
M.repo.Trees.m.remove({})
|
32 |
TreesDoc.m.remove({})
|
37 |
M.repo.DiffInfo.m.remove({})
|
33 |
DiffInfoDoc.m.remove({})
|
38 |
M.repo.LastCommit.m.remove({})
|
34 |
LastCommitDoc.m.remove({})
|
39 |
M.repo.BasicBlock.m.remove({})
|
35 |
CommitRunDoc.m.remove({})
|
40 |
repo = c.app.repo._impl._git
|
|
|
41 |
|
36 |
|
42 |
# Get all commits
|
37 |
# Get all commits (repo-specific)
|
43 |
seen = set()
|
38 |
all_commit_ids = list(c.app.repo.all_commit_ids())
|
44 |
all_commit_ids = []
|
|
|
45 |
for head in repo.heads:
|
|
|
46 |
for ci in repo.iter_commits(head, topo_order=True):
|
|
|
47 |
if ci.binsha in seen: continue
|
|
|
48 |
seen.add(ci.binsha)
|
|
|
49 |
all_commit_ids.append(ci.hexsha)
|
|
|
50 |
|
39 |
|
51 |
# Skip commits that are already in the DB
|
40 |
# Skip commits that are already in the DB (repo-agnostic)
|
52 |
commit_ids = unknown_commit_ids(all_commit_ids)
|
41 |
commit_ids = unknown_commit_ids(all_commit_ids)
|
53 |
# commit_ids = commit_ids[:500]
|
42 |
# commit_ids = commit_ids[:500]
|
54 |
log.info('Refreshing %d commits', len(commit_ids))
|
43 |
log.info('Refreshing %d commits', len(commit_ids))
|
55 |
|
44 |
|
56 |
# Refresh commits
|
45 |
# Refresh commits (repo-specific)
|
|
|
46 |
seen = set()
|
57 |
for i, oid in enumerate(commit_ids):
|
47 |
for i, oid in enumerate(commit_ids):
|
58 |
ci = repo.rev_parse(oid)
|
|
|
59 |
refresh_commit_info(ci, seen)
|
48 |
c.app.repo.refresh_commit_info(oid, seen)
|
60 |
if (i+1) % 100 == 0:
|
49 |
if (i+1) % 100 == 0:
|
61 |
log.info('Refresh commit info %d: %s', (i+1), oid)
|
50 |
log.info('Refresh commit info %d: %s', (i+1), oid)
|
62 |
|
51 |
|
63 |
#############################################
|
52 |
#############################################
|
64 |
# Everything below here is repo-agnostic
|
53 |
# Everything below here is repo-agnostic
|
65 |
#############################################
|
54 |
#############################################
|
66 |
|
55 |
|
67 |
refresh_repo(commit_ids, c.app.repo._id)
|
56 |
refresh_repo(commit_ids, c.app.repo)
|
68 |
|
57 |
|
69 |
# Refresh child references
|
58 |
# Refresh child references
|
70 |
seen = set()
|
59 |
seen = set()
|
71 |
parents = set()
|
60 |
parents = set()
|
72 |
|
61 |
|
73 |
for i, oid in enumerate(commit_ids):
|
62 |
for i, oid in enumerate(commit_ids):
|
74 |
ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
|
63 |
ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
|
75 |
refresh_children(ci)
|
64 |
refresh_children(ci)
|
76 |
seen.add(ci._id)
|
65 |
seen.add(ci._id)
|
77 |
parents.update(ci.parent_ids)
|
66 |
parents.update(ci.parent_ids)
|
78 |
if (i+1) % 100 == 0:
|
67 |
if (i+1) % 100 == 0:
|
79 |
log.info('Refresh child (a) info %d: %s', (i+1), ci._id)
|
68 |
log.info('Refresh child (a) info %d: %s', (i+1), ci._id)
|
80 |
for j, oid in enumerate(parents-seen):
|
69 |
for j, oid in enumerate(parents-seen):
|
81 |
try:
|
70 |
try:
|
82 |
ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
|
71 |
ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
|
83 |
except StopIteration:
|
72 |
except StopIteration:
|
84 |
continue
|
73 |
continue
|
85 |
refresh_children(ci)
|
74 |
refresh_children(ci)
|
86 |
if (i + j + 1) % 100 == 0:
|
75 |
if (i + j + 1) % 100 == 0:
|
87 |
log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id)
|
76 |
log.info('Refresh child (b) info %d: %s', (i + j + 1), ci._id)
|
88 |
|
77 |
|
89 |
# Refresh basic blocks
|
78 |
# Refresh commit runs
|
90 |
bbb = BasicBlockBuilder(commit_ids)
|
79 |
rb = CommitRunBuilder(commit_ids)
|
91 |
bbb.run()
|
80 |
rb.run()
|
92 |
bbb.cleanup()
|
81 |
rb.cleanup()
|
93 |
|
|
|
94 |
# Verify the log
|
|
|
95 |
log.info('Logging via basic blocks')
|
|
|
96 |
for i, ci in enumerate(commitlog(commit_ids[0])):
|
|
|
97 |
pass
|
|
|
98 |
log.info('... done (%d commits from %s)', i+1, commit_ids[0])
|
|
|
99 |
|
82 |
|
100 |
# Refresh trees
|
83 |
# Refresh trees
|
101 |
cache = {}
|
84 |
cache = {}
|
102 |
for i, oid in enumerate(commit_ids):
|
85 |
for i, oid in enumerate(commit_ids):
|
103 |
ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
|
86 |
ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
|
104 |
cache = refresh_commit_trees(ci, cache)
|
87 |
cache = refresh_commit_trees(ci, cache)
|
105 |
if (i+1) % 100 == 0:
|
88 |
if (i+1) % 100 == 0:
|
106 |
log.info('Refresh commit trees %d: %s', (i+1), ci._id)
|
89 |
log.info('Refresh commit trees %d: %s', (i+1), ci._id)
|
107 |
|
90 |
|
108 |
# Compute diffs
|
91 |
# Compute diffs
|
109 |
cache = {}
|
92 |
cache = {}
|
110 |
for i, oid in enumerate(commit_ids):
|
93 |
for i, oid in enumerate(commit_ids):
|
111 |
ci = M.repo.Commit.m.find(dict(_id=oid), validate=False).next()
|
94 |
ci = CommitDoc.m.find(dict(_id=oid), validate=False).next()
|
112 |
compute_diffs(c.app.repo._id, cache, ci)
|
95 |
compute_diffs(c.app.repo._id, cache, ci)
|
113 |
if (i+1) % 100 == 0:
|
96 |
if (i+1) % 100 == 0:
|
114 |
log.info('Compute diffs %d: %s', (i+1), ci._id)
|
97 |
log.info('Compute diffs %d: %s', (i+1), ci._id)
|
115 |
|
98 |
|
116 |
def refresh_commit_trees(ci, cache):
|
99 |
def refresh_commit_trees(ci, cache):
|
117 |
trees_doc = M.repo.Trees(dict(
|
100 |
trees_doc = TreesDoc(dict(
|
118 |
_id=ci._id,
|
101 |
_id=ci._id,
|
119 |
tree_ids = list(trees(ci.tree_id, cache))))
|
102 |
tree_ids = list(trees(ci.tree_id, cache))))
|
120 |
trees_doc.m.save(safe=False)
|
103 |
trees_doc.m.save(safe=False)
|
121 |
new_cache = dict(
|
104 |
new_cache = dict(
|
122 |
(oid, cache[oid])
|
105 |
(oid, cache[oid])
|
123 |
for oid in trees_doc.tree_ids)
|
106 |
for oid in trees_doc.tree_ids)
|
124 |
return new_cache
|
107 |
return new_cache
|
125 |
|
108 |
|
126 |
def refresh_commit_info(ci, seen):
|
109 |
def refresh_commit_info(ci, seen):
|
127 |
if M.repo.Commit.m.find(dict(_id=ci.hexsha)).count() != 0:
|
110 |
if CommitDoc.m.find(dict(_id=ci.hexsha)).count() != 0:
|
128 |
return False
|
111 |
return False
|
129 |
try:
|
112 |
try:
|
130 |
ci_doc = M.repo.Commit(dict(
|
113 |
ci_doc = CommitDoc(dict(
|
131 |
_id=ci.hexsha,
|
114 |
_id=ci.hexsha,
|
132 |
tree_id=ci.tree.hexsha,
|
115 |
tree_id=ci.tree.hexsha,
|
133 |
committed = Object(
|
116 |
committed = Object(
|
134 |
name=h.really_unicode(ci.committer.name),
|
117 |
name=h.really_unicode(ci.committer.name),
|
135 |
email=h.really_unicode(ci.committer.email),
|
118 |
email=h.really_unicode(ci.committer.email),
|
|
... |
|
... |
147 |
except DuplicateKeyError:
|
130 |
except DuplicateKeyError:
|
148 |
return False
|
131 |
return False
|
149 |
refresh_tree(ci.tree, seen)
|
132 |
refresh_tree(ci.tree, seen)
|
150 |
return True
|
133 |
return True
|
151 |
|
134 |
|
152 |
def refresh_repo(commit_ids, repo_id):
|
135 |
def refresh_repo(commit_ids, repo):
|
153 |
for oids in utils.chunked_iter(commit_ids, QSIZE):
|
136 |
for oids in utils.chunked_iter(commit_ids, QSIZE):
|
154 |
oids = list(oids)
|
137 |
oids = list(oids)
|
|
|
138 |
# Create shortlinks and artifactrefs
|
|
|
139 |
for oid in oids:
|
|
|
140 |
index_id = 'allura.model.repo.Commit#' + oid
|
|
|
141 |
ref = ArtifactReferenceDoc(dict(
|
|
|
142 |
_id=index_id,
|
|
|
143 |
artifact_reference=dict(
|
|
|
144 |
cls=dumps(Commit),
|
|
|
145 |
project_id=repo.app.config.project_id,
|
|
|
146 |
app_config_id=repo.app.config._id,
|
|
|
147 |
artifact_id=oid),
|
|
|
148 |
references=[]))
|
|
|
149 |
link = ShortlinkDoc(dict(
|
|
|
150 |
_id=bson.ObjectId(),
|
|
|
151 |
ref_id=index_id,
|
|
|
152 |
project_id=repo.app.config.project_id,
|
|
|
153 |
app_config_id=repo.app.config._id,
|
|
|
154 |
link=repo.shorthand_for_commit(oid),
|
|
|
155 |
url=repo.url() + 'ci/' + oid + '/'))
|
|
|
156 |
ref.m.save(safe=False, validate=False)
|
|
|
157 |
link.m.save(safe=False, validate=False)
|
155 |
M.repo.Commit.m.update_partial(
|
158 |
CommitDoc.m.update_partial(
|
156 |
dict(
|
159 |
dict(
|
157 |
_id={'$in': oids},
|
160 |
_id={'$in': oids},
|
158 |
repo_ids={'$ne': repo_id}),
|
161 |
repo_ids={'$ne': repo._id}),
|
159 |
{'$addToSet': dict(repo_ids=repo_id)},
|
162 |
{'$addToSet': dict(repo_ids=repo._id)},
|
160 |
multi=True)
|
163 |
multi=True)
|
161 |
|
164 |
|
162 |
def refresh_children(ci):
|
165 |
def refresh_children(ci):
|
163 |
M.repo.Commit.m.update_partial(
|
166 |
CommitDoc.m.update_partial(
|
164 |
dict(_id={'$in': ci.parent_ids}),
|
167 |
dict(_id={'$in': ci.parent_ids}),
|
165 |
{'$addToSet': dict(child_ids=ci._id)},
|
168 |
{'$addToSet': dict(child_ids=ci._id)},
|
166 |
multi=True)
|
169 |
multi=True)
|
167 |
|
170 |
|
168 |
class BasicBlockBuilder(object):
|
171 |
class CommitRunBuilder(object):
|
169 |
|
172 |
|
170 |
def __init__(self, commit_ids):
|
173 |
def __init__(self, commit_ids):
|
171 |
self.commit_ids = commit_ids
|
174 |
self.commit_ids = commit_ids
|
172 |
self.block_index = {} # by commit ID
|
175 |
self.run_index = {} # by commit ID
|
173 |
self.blocks = {} # by block ID
|
176 |
self.runs = {} # by run ID
|
174 |
self.reasons = {} # reasons to stop merging blocks
|
177 |
self.reasons = {} # reasons to stop merging runs
|
175 |
|
178 |
|
176 |
def run(self):
|
179 |
def run(self):
|
177 |
for oids in utils.chunked_iter(self.commit_ids, QSIZE):
|
180 |
for oids in utils.chunked_iter(self.commit_ids, QSIZE):
|
178 |
oids = list(oids)
|
181 |
oids = list(oids)
|
179 |
commits = list(M.repo.Commit.m.find(dict(_id={'$in':oids})))
|
182 |
commits = list(CommitDoc.m.find(dict(_id={'$in':oids})))
|
180 |
for ci in commits:
|
183 |
for ci in commits:
|
181 |
if ci._id in self.block_index: continue
|
184 |
if ci._id in self.run_index: continue
|
182 |
self.block_index[ci._id] = ci._id
|
185 |
self.run_index[ci._id] = ci._id
|
183 |
self.blocks[ci._id] = M.repo.BasicBlock(dict(
|
186 |
self.runs[ci._id] = CommitRunDoc(dict(
|
184 |
_id=ci._id,
|
187 |
_id=ci._id,
|
185 |
parent_commit_ids=ci.parent_ids,
|
188 |
parent_commit_ids=ci.parent_ids,
|
186 |
commit_ids=[ci._id],
|
189 |
commit_ids=[ci._id],
|
187 |
commit_times=[ci.authored.date]))
|
190 |
commit_times=[ci.authored.date]))
|
188 |
self.merge_blocks()
|
191 |
self.merge_runs()
|
189 |
log.info('%d basic blocks', len(self.blocks))
|
192 |
log.info('%d runs', len(self.runs))
|
190 |
for bid, bb in sorted(self.blocks.items()):
|
193 |
for rid, run in sorted(self.runs.items()):
|
191 |
log.info('%32s: %r', self.reasons.get(bid, 'none'), bb)
|
194 |
log.info('%32s: %r', self.reasons.get(rid, 'none'), run._id)
|
192 |
for bb in self.blocks.itervalues():
|
195 |
for run in self.runs.itervalues():
|
193 |
bb.m.save()
|
196 |
run.m.save()
|
194 |
return self.blocks
|
197 |
return self.runs
|
195 |
|
198 |
|
196 |
def _all_blocks(self):
|
199 |
def _all_runs(self):
|
197 |
blocks = {}
|
200 |
runs = {}
|
198 |
for oids in utils.chunked_iter(self.commit_ids, QSIZE):
|
201 |
for oids in utils.chunked_iter(self.commit_ids, QSIZE):
|
199 |
oids = list(oids)
|
202 |
oids = list(oids)
|
200 |
for bb in M.repo.BasicBlock.m.find(dict(commit_ids={'$in': oids})):
|
203 |
for run in CommitRunDoc.m.find(dict(commit_ids={'$in': oids})):
|
201 |
blocks[bb._id] = bb
|
204 |
runs[run._id] = run
|
202 |
seen_bids = set()
|
205 |
seen_run_ids = set()
|
203 |
blocks = blocks.values()
|
206 |
runs = runs.values()
|
204 |
while blocks:
|
207 |
while runs:
|
205 |
bb = blocks.pop()
|
208 |
run = runs.pop()
|
206 |
if bb._id in seen_bids: continue
|
209 |
if run._id in seen_run_ids: continue
|
207 |
seen_bids.add(bb._id)
|
210 |
seen_run_ids.add(run._id)
|
208 |
yield bb
|
211 |
yield run
|
209 |
for bb in M.repo.BasicBlock.m.find(
|
212 |
for run in CommitRunDoc.m.find(
|
210 |
dict(commit_ids={'$in':bb.parent_commit_ids})):
|
213 |
dict(commit_ids={'$in':run.parent_commit_ids})):
|
211 |
blocks.append(bb)
|
214 |
runs.append(run)
|
212 |
|
215 |
|
213 |
def cleanup(self):
|
216 |
def cleanup(self):
|
214 |
'''Delete non-maximal basic blocks'''
|
217 |
'''Delete non-maximal runs'''
|
215 |
for bb1 in self._all_blocks():
|
218 |
for run1 in self._all_runs():
|
216 |
for bb2 in M.repo.BasicBlock.m.find(dict(
|
219 |
for run2 in CommitRunDoc.m.find(dict(
|
217 |
commit_ids=bb1.commit_ids[0])):
|
220 |
commit_ids=run1.commit_ids[0])):
|
218 |
if bb2._id == bb1._id: continue
|
221 |
if run1._id == run2._id: continue
|
219 |
log.info('... delete %r (part of %r)', bb2, bb1)
|
222 |
log.info('... delete %r (part of %r)', run2, run1)
|
220 |
import pdb; pdb.set_trace()
|
|
|
221 |
bb2.m.delete()
|
223 |
run2.m.delete()
|
222 |
|
224 |
|
223 |
def merge_blocks(self):
|
225 |
def merge_runs(self):
|
224 |
while True:
|
226 |
while True:
|
225 |
for bid, bb in self.blocks.iteritems():
|
227 |
for run_id, run in self.runs.iteritems():
|
226 |
if len(bb.parent_commit_ids) != 1:
|
228 |
if len(run.parent_commit_ids) != 1:
|
227 |
self.reasons[bid] = '%d parents' % len(bb.parent_commit_ids)
|
229 |
self.reasons[run_id] = '%d parents' % len(run.parent_commit_ids)
|
228 |
continue
|
230 |
continue
|
229 |
p_oid = bb.parent_commit_ids[0]
|
231 |
p_oid = run.parent_commit_ids[0]
|
230 |
p_bid = self.block_index.get(p_oid)
|
232 |
p_run_id = self.run_index.get(p_oid)
|
231 |
if p_bid is None:
|
233 |
if p_run_id is None:
|
232 |
self.reasons[bid] = 'parent commit not found'
|
234 |
self.reasons[run_id] = 'parent commit not found'
|
233 |
continue
|
235 |
continue
|
234 |
p_bb = self.blocks.get(p_bid)
|
236 |
p_run = self.runs.get(p_run_id)
|
235 |
if p_bb is None:
|
237 |
if p_run is None:
|
236 |
self.reasons[bid] = 'parent block not found'
|
238 |
self.reasons[run_id] = 'parent run not found'
|
237 |
continue
|
239 |
continue
|
238 |
if p_bb.commit_ids[0] != p_oid:
|
240 |
if p_run.commit_ids[0] != p_oid:
|
239 |
self.reasons[bid] = 'parent does not start with parent commit'
|
241 |
self.reasons[run_id] = 'parent does not start with parent commit'
|
240 |
continue
|
242 |
continue
|
241 |
bb.commit_ids += p_bb.commit_ids
|
243 |
run.commit_ids += p_run.commit_ids
|
242 |
bb.commit_times += p_bb.commit_times
|
244 |
run.commit_times += p_run.commit_times
|
243 |
bb.parent_commit_ids = p_bb.parent_commit_ids
|
245 |
run.parent_commit_ids = p_run.parent_commit_ids
|
244 |
for oid in p_bb.commit_ids:
|
246 |
for oid in p_run.commit_ids:
|
245 |
self.block_index[oid] = bid
|
247 |
self.run_index[oid] = run_id
|
246 |
break
|
248 |
break
|
247 |
else:
|
249 |
else:
|
248 |
break
|
250 |
break
|
249 |
del self.blocks[p_bid]
|
251 |
del self.runs[p_run_id]
|
250 |
|
252 |
|
251 |
def refresh_tree(t, seen):
|
253 |
def refresh_tree(t, seen):
|
252 |
if t.binsha in seen: return
|
254 |
if t.binsha in seen: return
|
253 |
seen.add(t.binsha)
|
255 |
seen.add(t.binsha)
|
254 |
doc = M.repo.Tree(dict(
|
256 |
doc = TreeDoc(dict(
|
255 |
_id=t.hexsha,
|
257 |
_id=t.hexsha,
|
256 |
tree_ids=[],
|
258 |
tree_ids=[],
|
257 |
blob_ids=[],
|
259 |
blob_ids=[],
|
258 |
other_ids=[]))
|
260 |
other_ids=[]))
|
259 |
for o in t:
|
261 |
for o in t:
|
|
... |
|
... |
272 |
|
274 |
|
273 |
def trees(id, cache):
|
275 |
def trees(id, cache):
|
274 |
yield id
|
276 |
yield id
|
275 |
entries = cache.get(id, None)
|
277 |
entries = cache.get(id, None)
|
276 |
if entries is None:
|
278 |
if entries is None:
|
277 |
t = M.repo.Tree.m.get(_id=id)
|
279 |
t = TreeDoc.m.get(_id=id)
|
278 |
entries = [ o.id for o in t.tree_ids ]
|
280 |
entries = [ o.id for o in t.tree_ids ]
|
279 |
cache[id] = entries
|
281 |
cache[id] = entries
|
280 |
for i in entries:
|
282 |
for i in entries:
|
281 |
for x in trees(i, cache):
|
283 |
for x in trees(i, cache):
|
282 |
yield x
|
284 |
yield x
|
283 |
|
285 |
|
284 |
def unknown_commit_ids(all_commit_ids):
|
286 |
def unknown_commit_ids(all_commit_ids):
|
285 |
result = []
|
287 |
result = []
|
286 |
for chunk in utils.chunked_iter(all_commit_ids, QSIZE):
|
288 |
for chunk in utils.chunked_iter(all_commit_ids, QSIZE):
|
287 |
q = M.repo.Commit.m.find(_id={'$in':chunk})
|
289 |
q = CommitDoc.m.find(_id={'$in':chunk})
|
288 |
known_commit_ids = set(ci._id for ci in q)
|
290 |
known_commit_ids = set(ci._id for ci in q)
|
289 |
result += [ oid for oid in chunk if oid not in known_commit_ids ]
|
291 |
result += [ oid for oid in chunk if oid not in known_commit_ids ]
|
290 |
return result
|
292 |
return result
|
291 |
|
293 |
|
292 |
def compute_diffs(repo_id, tree_cache, rhs_ci):
|
294 |
def compute_diffs(repo_id, tree_cache, rhs_ci):
|
|
... |
|
... |
296 |
for x in tree.tree_ids:
|
298 |
for x in tree.tree_ids:
|
297 |
yield x.id
|
299 |
yield x.id
|
298 |
for xx in _walk_tree(tree_index[x.id], tree_index):
|
300 |
for xx in _walk_tree(tree_index[x.id], tree_index):
|
299 |
yield xx
|
301 |
yield xx
|
300 |
|
302 |
|
301 |
rhs_tree_ids = M.repo.Trees.m.get(_id=rhs_ci._id).tree_ids
|
303 |
rhs_tree_ids = TreesDoc.m.get(_id=rhs_ci._id).tree_ids
|
302 |
if rhs_ci.parent_ids:
|
304 |
if rhs_ci.parent_ids:
|
303 |
lhs_ci = M.repo.Commit.m.get(_id=rhs_ci.parent_ids[0])
|
305 |
lhs_ci = CommitDoc.m.get(_id=rhs_ci.parent_ids[0])
|
304 |
else:
|
306 |
else:
|
305 |
lhs_ci = None
|
307 |
lhs_ci = None
|
306 |
if lhs_ci is not None:
|
308 |
if lhs_ci is not None:
|
307 |
lhs_tree_ids = M.repo.Trees.m.get(_id=lhs_ci._id).tree_ids
|
309 |
lhs_tree_ids = TreesDoc.m.get(_id=lhs_ci._id).tree_ids
|
308 |
else:
|
310 |
else:
|
309 |
lhs_tree_ids = []
|
311 |
lhs_tree_ids = []
|
310 |
new_tree_ids = [
|
312 |
new_tree_ids = [
|
311 |
tid for tid in chain(lhs_tree_ids, rhs_tree_ids)
|
313 |
tid for tid in chain(lhs_tree_ids, rhs_tree_ids)
|
312 |
if tid not in tree_cache ]
|
314 |
if tid not in tree_cache ]
|
313 |
tree_index = dict(
|
315 |
tree_index = dict(
|
314 |
(t._id, t) for t in M.repo.Tree.m.find(dict(_id={'$in': new_tree_ids}),validate=False))
|
316 |
(t._id, t) for t in TreeDoc.m.find(dict(_id={'$in': new_tree_ids}),validate=False))
|
315 |
tree_index.update(tree_cache)
|
317 |
tree_index.update(tree_cache)
|
316 |
rhs_tree_ids_set = set(rhs_tree_ids)
|
318 |
rhs_tree_ids_set = set(rhs_tree_ids)
|
317 |
tree_cache.clear()
|
319 |
tree_cache.clear()
|
318 |
tree_cache.update(
|
320 |
tree_cache.update(
|
319 |
(id, t) for id,t in tree_index.iteritems() if id in rhs_tree_ids_set)
|
321 |
(id, t) for id,t in tree_index.iteritems() if id in rhs_tree_ids_set)
|
|
... |
|
... |
326 |
for name, lhs_id, rhs_id in _diff_trees(lhs_tree, rhs_tree, tree_index):
|
328 |
for name, lhs_id, rhs_id in _diff_trees(lhs_tree, rhs_tree, tree_index):
|
327 |
differences.append(
|
329 |
differences.append(
|
328 |
dict(name=name, lhs_id=lhs_id, rhs_id=rhs_id))
|
330 |
dict(name=name, lhs_id=lhs_id, rhs_id=rhs_id))
|
329 |
# Set last commit info
|
331 |
# Set last commit info
|
330 |
if rhs_id is not None:
|
332 |
if rhs_id is not None:
|
331 |
M.repo.LastCommit.set_last_commit(repo_id, rhs_id, rhs_ci)
|
333 |
_set_last_commit(repo_id, rhs_id, rhs_ci)
|
332 |
rhs_tree = tree_index.get(rhs_id, None)
|
334 |
rhs_tree = tree_index.get(rhs_id, None)
|
333 |
if rhs_tree is not None:
|
335 |
if rhs_tree is not None:
|
334 |
for oid in _walk_tree(rhs_tree, tree_index):
|
336 |
for oid in _walk_tree(rhs_tree, tree_index):
|
335 |
M.repo.LastCommit.set_last_commit(repo_id, oid, rhs_ci)
|
337 |
_set_last_commit(repo_id, oid, rhs_ci)
|
336 |
di = M.repo.DiffInfo(dict(
|
338 |
di = DiffInfoDoc(dict(
|
337 |
_id=rhs_ci._id,
|
339 |
_id=rhs_ci._id,
|
338 |
differences=differences))
|
340 |
differences=differences))
|
339 |
di.m.save()
|
341 |
di.m.save()
|
340 |
return tree_cache
|
342 |
return tree_cache
|
341 |
|
|
|
342 |
def commitlog(commit_id, skip=0, limit=sys.maxint):
|
|
|
343 |
|
|
|
344 |
seen = set()
|
|
|
345 |
def _visit(commit_id):
|
|
|
346 |
if commit_id in seen: return
|
|
|
347 |
bb = M.repo.BasicBlock.m.get(commit_ids=commit_id)
|
|
|
348 |
if bb is None: return
|
|
|
349 |
index = False
|
|
|
350 |
for pos, (oid, time) in enumerate(izip(bb.commit_ids, bb.commit_times)):
|
|
|
351 |
if oid == commit_id: index = True
|
|
|
352 |
elif not index: continue
|
|
|
353 |
seen.add(oid)
|
|
|
354 |
ci_times[oid] = time
|
|
|
355 |
if pos+1 < len(bb.commit_ids):
|
|
|
356 |
ci_parents[oid] = [ bb.commit_ids[pos+1] ]
|
|
|
357 |
else:
|
|
|
358 |
ci_parents[oid] = bb.parent_commit_ids
|
|
|
359 |
for oid in bb.parent_commit_ids:
|
|
|
360 |
_visit(oid)
|
|
|
361 |
|
|
|
362 |
def _gen_ids(commit_id, skip, limit):
|
|
|
363 |
# Traverse the graph in topo order, yielding commit IDs
|
|
|
364 |
commits = set([commit_id])
|
|
|
365 |
new_parent = None
|
|
|
366 |
while commits and limit:
|
|
|
367 |
# next commit is latest commit that's valid to log
|
|
|
368 |
if new_parent in commits:
|
|
|
369 |
ci = new_parent
|
|
|
370 |
else:
|
|
|
371 |
ci = max(commits, key=lambda ci:ci_times[ci])
|
|
|
372 |
commits.remove(ci)
|
|
|
373 |
if skip:
|
|
|
374 |
skip -= 1
|
|
|
375 |
continue
|
|
|
376 |
else:
|
|
|
377 |
limit -= 1
|
|
|
378 |
yield ci
|
|
|
379 |
# remove this commit from its parents children and add any childless
|
|
|
380 |
# parents to the 'ready set'
|
|
|
381 |
new_parent = None
|
|
|
382 |
for oid in ci_parents[ci]:
|
|
|
383 |
children = ci_children[oid]
|
|
|
384 |
children.discard(ci)
|
|
|
385 |
if not children:
|
|
|
386 |
commits.add(oid)
|
|
|
387 |
new_parent = oid
|
|
|
388 |
|
|
|
389 |
# Load all the blocks to build a commit graph
|
|
|
390 |
ci_times = {}
|
|
|
391 |
ci_parents = {}
|
|
|
392 |
ci_children = defaultdict(set)
|
|
|
393 |
log.info('Build commit graph')
|
|
|
394 |
_visit(commit_id)
|
|
|
395 |
for oid, parents in ci_parents.iteritems():
|
|
|
396 |
for ci_parent in parents:
|
|
|
397 |
ci_children[ci_parent].add(oid)
|
|
|
398 |
|
|
|
399 |
# Convert oids to commit objects
|
|
|
400 |
log.info('Traverse commit graph')
|
|
|
401 |
for oids in utils.chunked_iter(_gen_ids(commit_id, skip, limit), QSIZE):
|
|
|
402 |
oids = list(oids)
|
|
|
403 |
index = dict(
|
|
|
404 |
(ci._id, ci) for ci in M.repo.Commit.m.find(dict(_id={'$in': oids})))
|
|
|
405 |
for oid in oids:
|
|
|
406 |
yield index[oid]
|
|
|
407 |
|
343 |
|
408 |
def _diff_trees(lhs, rhs, index, *path):
|
344 |
def _diff_trees(lhs, rhs, index, *path):
|
409 |
def _fq(name):
|
345 |
def _fq(name):
|
410 |
return '/'.join(reversed(
|
346 |
return '/'.join(reversed(
|
411 |
(name,) + path))
|
347 |
(name,) + path))
|
|
... |
|
... |
439 |
else:
|
375 |
else:
|
440 |
yield (_fq(o.name), o.id, rhs_id)
|
376 |
yield (_fq(o.name), o.id, rhs_id)
|
441 |
for name, id in rhs_blob_ids.items():
|
377 |
for name, id in rhs_blob_ids.items():
|
442 |
yield (_fq(name), None, id)
|
378 |
yield (_fq(name), None, id)
|
443 |
|
379 |
|
|
|
380 |
def _set_last_commit(repo_id, oid, commit):
|
|
|
381 |
lc = LastCommitDoc(dict(
|
|
|
382 |
_id='%s:%s' % (repo_id, oid),
|
|
|
383 |
repo_id=repo_id,
|
|
|
384 |
object_id=oid,
|
|
|
385 |
commit_info=dict(
|
|
|
386 |
id=commit._id,
|
|
|
387 |
author=commit.authored.name,
|
|
|
388 |
author_email=commit.authored.email,
|
|
|
389 |
date=commit.authored.date,
|
|
|
390 |
# author_url=commit.author_url,
|
|
|
391 |
# href=commit.url(),
|
|
|
392 |
# shortlink=commit.shorthand_id(),
|
|
|
393 |
# summary=commit.summary
|
|
|
394 |
)))
|
|
|
395 |
lc.m.save(safe=False)
|
|
|
396 |
return lc
|
|
|
397 |
|
444 |
if __name__ == '__main__':
|
398 |
if __name__ == '__main__':
|
445 |
main()
|
399 |
main()
|
446 |
# dolog()
|
400 |
# dolog()
|