|
a/Allura/allura/model/repo.py |
|
b/Allura/allura/model/repo.py |
|
|
1 |
import re
|
|
|
2 |
import sys
|
|
|
3 |
import logging
|
|
|
4 |
from hashlib import sha1
|
|
|
5 |
from itertools import izip, chain
|
1 |
from datetime import datetime
|
6 |
from datetime import datetime
|
|
|
7 |
from collections import defaultdict
|
2 |
|
8 |
|
3 |
from ming import Document, Field
|
9 |
from pylons import g
|
|
|
10 |
|
|
|
11 |
from ming import Field, Index, collection
|
4 |
from ming import schema as S
|
12 |
from ming import schema as S
|
|
|
13 |
from ming.utils import LazyProperty
|
|
|
14 |
from ming.orm import mapper
|
5 |
|
15 |
|
|
|
16 |
from allura.lib import utils
|
|
|
17 |
from allura.lib import helpers as h
|
|
|
18 |
|
|
|
19 |
from .auth import User
|
6 |
from .session import main_doc_session, project_doc_session
|
20 |
from .session import main_doc_session, project_doc_session
|
|
|
21 |
from .session import repository_orm_session
|
7 |
|
22 |
|
8 |
class Commit(Document):
|
23 |
log = logging.getLogger(__name__)
|
9 |
class __mongometa__:
|
24 |
|
10 |
name = 'repo_ci'
|
|
|
11 |
session = main_doc_session
|
|
|
12 |
indexes = [
|
|
|
13 |
('parent_ids',),
|
|
|
14 |
('child_ids',),
|
|
|
15 |
('repo_ids',)]
|
|
|
16 |
User = dict(name=str, email=str, date=datetime)
|
25 |
SUser = dict(name=str, email=str, date=datetime)
|
17 |
|
|
|
18 |
_id = Field(str)
|
|
|
19 |
tree_id = Field(str)
|
|
|
20 |
committed = Field(User)
|
|
|
21 |
authored = Field(User)
|
|
|
22 |
message = Field(str)
|
|
|
23 |
parent_ids = Field([str])
|
|
|
24 |
child_ids = Field([str])
|
|
|
25 |
repo_ids = Field([S.ObjectId()])
|
|
|
26 |
|
|
|
27 |
def __repr__(self):
|
|
|
28 |
return '%s %s' % (
|
|
|
29 |
self._id[:7], self.summary)
|
|
|
30 |
|
|
|
31 |
@property
|
|
|
32 |
def summary(self):
|
|
|
33 |
if self.message:
|
|
|
34 |
summary = []
|
|
|
35 |
for line in self.message.splitlines():
|
|
|
36 |
line = line.rstrip()
|
|
|
37 |
if line: summary.append(line)
|
|
|
38 |
else: return ' '.join(summary)
|
|
|
39 |
return ' '.join(summary)
|
|
|
40 |
return ''
|
|
|
41 |
|
|
|
42 |
def url(self):
|
|
|
43 |
return ''
|
|
|
44 |
|
|
|
45 |
def shorthand_id(self):
|
|
|
46 |
return ''
|
|
|
47 |
|
|
|
48 |
@property
|
|
|
49 |
def author_url(self):
|
|
|
50 |
return ''
|
|
|
51 |
|
|
|
52 |
class Tree(Document):
|
|
|
53 |
class __mongometa__:
|
|
|
54 |
name = 'repo_tree'
|
|
|
55 |
session = main_doc_session
|
|
|
56 |
ObjType=S.OneOf('blob', 'tree', 'submodule')
|
26 |
SObjType=S.OneOf('blob', 'tree', 'submodule')
|
|
|
27 |
QSIZE = 100
|
|
|
28 |
README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE)
|
57 |
|
29 |
|
58 |
_id = Field(str)
|
30 |
# Basic commit information
|
|
|
31 |
CommitDoc = collection(
|
|
|
32 |
'repo_ci', main_doc_session,
|
|
|
33 |
Field('_id', str),
|
|
|
34 |
Field('tree_id', str),
|
|
|
35 |
Field('committed', SUser),
|
|
|
36 |
Field('authored', SUser),
|
|
|
37 |
Field('message', str),
|
|
|
38 |
Field('parent_ids', [str], index=True),
|
|
|
39 |
Field('child_ids', [str], index=True),
|
|
|
40 |
Field('repo_ids', [ S.ObjectId() ], index=True))
|
|
|
41 |
|
|
|
42 |
# Basic tree information
|
|
|
43 |
TreeDoc = collection(
|
|
|
44 |
'repo_tree', main_doc_session,
|
|
|
45 |
Field('_id', str),
|
59 |
tree_ids = Field([dict(name=str, id=str)])
|
46 |
Field('tree_ids', [dict(name=str, id=str)]),
|
60 |
blob_ids = Field([dict(name=str, id=str)])
|
47 |
Field('blob_ids', [dict(name=str, id=str)]),
|
61 |
other_ids = Field([dict(name=str, id=str, type=ObjType)])
|
48 |
Field('other_ids', [dict(name=str, id=str, type=SObjType)]))
|
62 |
|
49 |
|
63 |
class LastCommit(Document):
|
50 |
# Information about the last commit to touch a tree/blob
|
64 |
class __mongometa__:
|
51 |
LastCommitDoc = collection(
|
65 |
name = 'repo_last_commit'
|
52 |
'repo_last_commit', project_doc_session,
|
66 |
session = project_doc_session
|
53 |
Field('_id', str),
|
67 |
indexes = [
|
54 |
Field('repo_id', S.ObjectId()),
|
68 |
( 'repo_id', 'object_id'),
|
55 |
Field('object_id', str),
|
69 |
]
|
56 |
Field('commit_info', dict(
|
70 |
|
|
|
71 |
_id = Field(str)
|
|
|
72 |
repo_id=Field(S.ObjectId())
|
|
|
73 |
object_id=Field(str)
|
|
|
74 |
commit_info = Field(dict(
|
|
|
75 |
id=str,
|
57 |
id=str,
|
76 |
date=datetime,
|
58 |
date=datetime,
|
77 |
author=str,
|
59 |
author=str,
|
78 |
author_email=str,
|
60 |
author_email=str,
|
79 |
author_url=str,
|
61 |
author_url=str,
|
80 |
href=str,
|
62 |
href=str,
|
81 |
shortlink=str,
|
63 |
shortlink=str,
|
82 |
summary=str))
|
64 |
summary=str)),
|
|
|
65 |
Index('repo_id', 'object_id'))
|
83 |
|
66 |
|
84 |
@classmethod
|
67 |
# List of all trees contained within a commit
|
85 |
def set_last_commit(cls, repo_id, oid, commit):
|
68 |
TreesDoc = collection(
|
86 |
lc = cls(dict(
|
69 |
'repo_trees', main_doc_session,
|
87 |
_id='%s:%s' % (repo_id, oid),
|
70 |
Field('_id', str),
|
88 |
repo_id=repo_id,
|
71 |
Field('tree_ids', [str]))
|
89 |
object_id=oid,
|
|
|
90 |
commit_info=dict(
|
|
|
91 |
id=commit._id,
|
|
|
92 |
author=commit.authored.name,
|
|
|
93 |
author_email=commit.authored.email,
|
|
|
94 |
author_url=commit.author_url,
|
|
|
95 |
date=commit.authored.date,
|
|
|
96 |
href=commit.url(),
|
|
|
97 |
shortlink=commit.shorthand_id(),
|
|
|
98 |
summary=commit.summary)))
|
|
|
99 |
lc.m.save(safe=False)
|
|
|
100 |
return lc
|
|
|
101 |
|
72 |
|
102 |
class Trees(Document):
|
73 |
# Information about which things were added/removed in commit
|
103 |
class __mongometa__:
|
74 |
DiffInfoDoc = collection(
|
104 |
name = 'repo_trees'
|
75 |
'repo_diffinfo', main_doc_session,
|
105 |
session = main_doc_session
|
76 |
Field('_id', str),
|
106 |
|
77 |
Field(
|
107 |
_id = Field(str) # commit ID
|
78 |
'differences',
|
108 |
tree_ids = Field([str]) # tree IDs
|
|
|
109 |
|
|
|
110 |
class DiffInfo(Document):
|
|
|
111 |
class __mongometa__:
|
|
|
112 |
name = 'repo_diffinfo'
|
|
|
113 |
session = main_doc_session
|
|
|
114 |
|
|
|
115 |
_id = Field(str)
|
|
|
116 |
differences = Field([dict(name=str, lhs_id=str, rhs_id=str)])
|
79 |
[ dict(name=str, lhs_id=str, rhs_id=str)]))
|
117 |
|
80 |
|
118 |
class BasicBlock(Document):
|
81 |
# List of commit runs (a run is a linear series of single-parent commits)
|
119 |
class __mongometa__:
|
82 |
CommitRunDoc = collection(
|
120 |
name = 'repo_basic_block'
|
83 |
'repo_commitrun', main_doc_session,
|
121 |
session = main_doc_session
|
84 |
Field('_id', str),
|
122 |
indexes = [
|
85 |
Field('parent_commit_ids', [str]),
|
123 |
('commit_ids',) ]
|
86 |
Field('commit_ids', [str], index=True),
|
|
|
87 |
Field('commit_times', [datetime]))
|
124 |
|
88 |
|
125 |
_id = Field(str)
|
89 |
class RepoObject(object):
|
126 |
parent_commit_ids = Field([str])
|
|
|
127 |
commit_ids = Field([str])
|
|
|
128 |
commit_times = Field([datetime])
|
|
|
129 |
|
90 |
|
130 |
def __repr__(self):
|
91 |
def __repr__(self):
|
131 |
return '%s: (P %s, T %s..%s (%d commits))' % (
|
92 |
return '<%s %s>' % (
|
|
|
93 |
self.__class__.__name__, self._id)
|
|
|
94 |
|
|
|
95 |
def primary(self):
|
|
|
96 |
return self
|
|
|
97 |
|
|
|
98 |
def index_id(self):
|
|
|
99 |
'''Globally unique artifact identifier. Used for
|
|
|
100 |
SOLR ID, shortlinks, and maybe elsewhere
|
|
|
101 |
'''
|
|
|
102 |
id = '%s.%s#%s' % (
|
|
|
103 |
self.__class__.__module__,
|
|
|
104 |
self.__class__.__name__,
|
132 |
self._id[:6],
|
105 |
self._id)
|
133 |
[ oid[:6] for oid in self.parent_commit_ids ],
|
106 |
return id.replace('.', '/')
|
134 |
self.commit_ids[0][:6],
|
107 |
|
135 |
self.commit_ids[-1][:6],
|
108 |
class Commit(RepoObject):
|
136 |
len(self.commit_ids))
|
109 |
# Ephemeral attrs
|
|
|
110 |
repo=None
|
|
|
111 |
|
|
|
112 |
@LazyProperty
|
|
|
113 |
def author_url(self):
|
|
|
114 |
u = User.by_email_address(self.authored.email)
|
|
|
115 |
if u: return u.url()
|
|
|
116 |
|
|
|
117 |
@LazyProperty
|
|
|
118 |
def committer_url(self):
|
|
|
119 |
u = User.by_email_address(self.committed.email)
|
|
|
120 |
if u: return u.url()
|
|
|
121 |
|
|
|
122 |
@LazyProperty
|
|
|
123 |
def tree(self):
|
|
|
124 |
if self.tree_id is None:
|
|
|
125 |
self.tree_id = self.repo.compute_tree(self)
|
|
|
126 |
if self.tree_id is None:
|
|
|
127 |
return None
|
|
|
128 |
t = Tree.query.get(object_id=self.tree_id)
|
|
|
129 |
if t is None:
|
|
|
130 |
self.tree_id = self.repo.compute_tree(self)
|
|
|
131 |
t = Tree.query.get(object_id=self.tree_id)
|
|
|
132 |
if t is not None: t.set_context(self)
|
|
|
133 |
return t
|
|
|
134 |
|
|
|
135 |
@LazyProperty
|
|
|
136 |
def summary(self):
|
|
|
137 |
message = h.really_unicode(self.message)
|
|
|
138 |
first_line = message.split('\n')[0]
|
|
|
139 |
return h.text.truncate(first_line, 50)
|
|
|
140 |
|
|
|
141 |
def get_path(self, path):
|
|
|
142 |
'''Return the blob on the given path'''
|
|
|
143 |
if path.startswith('/'): path = path[1:]
|
|
|
144 |
path_parts = path.split('/')
|
|
|
145 |
return self.tree.get_blob(path_parts[-1], path_parts[:-1])
|
|
|
146 |
|
|
|
147 |
def shorthand_id(self):
|
|
|
148 |
return self.repo.shorthand_for_commit(self)
|
|
|
149 |
|
|
|
150 |
@LazyProperty
|
|
|
151 |
def symbolic_ids(self):
|
|
|
152 |
return self.repo.symbolics_for_commit(self)
|
|
|
153 |
|
|
|
154 |
def url(self):
|
|
|
155 |
return self.repo.url_for_commit(self)
|
|
|
156 |
|
|
|
157 |
def log_iter(self, skip, count):
|
|
|
158 |
for oids in utils.chunked_iter(commitlog(self._id), QSIZE):
|
|
|
159 |
oids = list(oids)
|
|
|
160 |
commits = dict(
|
|
|
161 |
(ci._id, ci) for ci in self.query.find(dict(
|
|
|
162 |
_id={'$in': oids})))
|
|
|
163 |
for oid in oids:
|
|
|
164 |
ci = commits[oid]
|
|
|
165 |
ci.set_context(self.repo)
|
|
|
166 |
yield ci
|
|
|
167 |
|
|
|
168 |
def log(self, skip, count):
|
|
|
169 |
return list(self.log_iter(skip, count))
|
|
|
170 |
|
|
|
171 |
def count_revisions(self):
|
|
|
172 |
result = 0
|
|
|
173 |
for oid in commitlog(self): result += 1
|
|
|
174 |
return result
|
|
|
175 |
|
|
|
176 |
def context(self):
|
|
|
177 |
result = dict(prev=None, next=None)
|
|
|
178 |
if self.parent_ids:
|
|
|
179 |
result['prev'] = self.query.get(_id=self.parent_ids[0])
|
|
|
180 |
if self.child_ids:
|
|
|
181 |
result['next'] = self.query.get(_id=self.child_ids[0])
|
|
|
182 |
return result
|
|
|
183 |
|
|
|
184 |
class Tree(RepoObject):
|
|
|
185 |
# Ephemeral attrs
|
|
|
186 |
repo=None
|
|
|
187 |
commit=None
|
|
|
188 |
parent=None
|
|
|
189 |
name=None
|
|
|
190 |
|
|
|
191 |
def compute_hash(self):
|
|
|
192 |
'''Compute a hash based on the contents of the tree. Note that this
|
|
|
193 |
hash does not necessarily correspond to any actual DVCS hash.
|
|
|
194 |
'''
|
|
|
195 |
lines = (
|
|
|
196 |
[ 'tree' + x.name + x.id for x in self.tree_ids ]
|
|
|
197 |
+ [ 'blob' + x.name + x.id for x in self.blob_ids ]
|
|
|
198 |
+ [ x.type + x.name + x.id for x in self.other_ids ])
|
|
|
199 |
sha_obj = sha1()
|
|
|
200 |
for line in sorted(lines):
|
|
|
201 |
sha_obj.update(line)
|
|
|
202 |
return sha_obj.hexdigest()
|
|
|
203 |
|
|
|
204 |
def set_context(self, commit_or_tree, name=None):
|
|
|
205 |
assert commit_or_tree is not self
|
|
|
206 |
self.repo = commit_or_tree.repo
|
|
|
207 |
if name:
|
|
|
208 |
self.commit = commit_or_tree.commit
|
|
|
209 |
self.parent = commit_or_tree
|
|
|
210 |
self.name = name
|
|
|
211 |
else:
|
|
|
212 |
self.commit = commit_or_tree
|
|
|
213 |
|
|
|
214 |
def readme(self):
|
|
|
215 |
name = None
|
|
|
216 |
text = ''
|
|
|
217 |
for x in self.blob_ids:
|
|
|
218 |
if README_RE.match(x.name):
|
|
|
219 |
name = x.name
|
|
|
220 |
text = h.really_unicode(self.repo.blob_text(x.id))
|
|
|
221 |
break
|
|
|
222 |
if text == '':
|
|
|
223 |
text = '<p><em>Empty File</em></p>'
|
|
|
224 |
else:
|
|
|
225 |
renderer = g.pypeline_markup.renderer(name)
|
|
|
226 |
if renderer[1]:
|
|
|
227 |
text = g.pypeline_markup.render(name,text)
|
|
|
228 |
else:
|
|
|
229 |
text = '<pre>%s</pre>' % text
|
|
|
230 |
return (name, text)
|
|
|
231 |
|
|
|
232 |
def ls(self):
|
|
|
233 |
# Load last commit info
|
|
|
234 |
oids = [ x.id for x in chain(self.tree_ids, self.object_ids, self.other_ids) ]
|
|
|
235 |
lc_index = dict(
|
|
|
236 |
(lc.object_id, lc.commit)
|
|
|
237 |
for lc in LastCommitDoc.m.find(dict(
|
|
|
238 |
repo_id=self.repo._id,
|
|
|
239 |
object_id={'$in': oids})))
|
|
|
240 |
results = []
|
|
|
241 |
def _get_last_commit(oid):
|
|
|
242 |
lc = lc_index.get(oid)
|
|
|
243 |
if lc is None:
|
|
|
244 |
lc = dict(
|
|
|
245 |
author=None,
|
|
|
246 |
author_email=None,
|
|
|
247 |
author_url=None,
|
|
|
248 |
date=None,
|
|
|
249 |
id=None,
|
|
|
250 |
href=None,
|
|
|
251 |
shortlink=None,
|
|
|
252 |
summary=None)
|
|
|
253 |
return lc
|
|
|
254 |
for x in sorted(self.tree_ids, key=lambda x:x.name):
|
|
|
255 |
results.append(dict(
|
|
|
256 |
kind='DIR',
|
|
|
257 |
name=x.name,
|
|
|
258 |
href=x.name + '/',
|
|
|
259 |
last_commit=_get_last_commit(x.id)))
|
|
|
260 |
for x in sorted(self.blob_ids, key=lambda x:x.name):
|
|
|
261 |
results.append(dict(
|
|
|
262 |
kind='FILE',
|
|
|
263 |
name=x.name,
|
|
|
264 |
href=x.name + '/',
|
|
|
265 |
last_commit=_get_last_commit(x.id)))
|
|
|
266 |
for x in sorted(self.other_ids, key=lambda x:x.name):
|
|
|
267 |
results.append(dict(
|
|
|
268 |
kind=x.type,
|
|
|
269 |
name=x.name,
|
|
|
270 |
href=None,
|
|
|
271 |
last_commit=_get_last_commit(x.id)))
|
|
|
272 |
return results
|
|
|
273 |
|
|
|
274 |
def path(self):
|
|
|
275 |
if self.parent:
|
|
|
276 |
assert self.parent is not self
|
|
|
277 |
return self.parent.path() + self.name + '/'
|
|
|
278 |
else:
|
|
|
279 |
return '/'
|
|
|
280 |
|
|
|
281 |
def url(self):
|
|
|
282 |
return self.commit.url() + 'tree' + self.path()
|
|
|
283 |
|
|
|
284 |
@LazyProperty
|
|
|
285 |
def by_name(self):
|
|
|
286 |
d = dict((x.name, x) for x in self.other_ids)
|
|
|
287 |
d.update(
|
|
|
288 |
(x.name, dict(x, type='tree'))
|
|
|
289 |
for x in self.tree_ids)
|
|
|
290 |
d.update(
|
|
|
291 |
(x.name, dict(x, type='blob'))
|
|
|
292 |
for x in self.blob_ids)
|
|
|
293 |
return d
|
|
|
294 |
|
|
|
295 |
def is_blob(self, name):
|
|
|
296 |
return self.by_name[name].type == 'blob'
|
|
|
297 |
|
|
|
298 |
mapper(Commit, CommitDoc, repository_orm_session)
|
|
|
299 |
mapper(Tree, TreeDoc, repository_orm_session)
|
|
|
300 |
|
|
|
301 |
def commitlog(commit_id, skip=0, limit=sys.maxint):
|
|
|
302 |
|
|
|
303 |
seen = set()
|
|
|
304 |
def _visit(commit_id):
|
|
|
305 |
if commit_id in seen: return
|
|
|
306 |
run = CommitRunDoc.m.get(commit_ids=commit_id)
|
|
|
307 |
if run is None: return
|
|
|
308 |
index = False
|
|
|
309 |
for pos, (oid, time) in enumerate(izip(run.commit_ids, run.commit_times)):
|
|
|
310 |
if oid == commit_id: index = True
|
|
|
311 |
elif not index: continue
|
|
|
312 |
seen.add(oid)
|
|
|
313 |
ci_times[oid] = time
|
|
|
314 |
if pos+1 < len(run.commit_ids):
|
|
|
315 |
ci_parents[oid] = [ run.commit_ids[pos+1] ]
|
|
|
316 |
else:
|
|
|
317 |
ci_parents[oid] = run.parent_commit_ids
|
|
|
318 |
for oid in run.parent_commit_ids:
|
|
|
319 |
_visit(oid)
|
|
|
320 |
|
|
|
321 |
def _gen_ids(commit_id, skip, limit):
|
|
|
322 |
# Traverse the graph in topo order, yielding commit IDs
|
|
|
323 |
commits = set([commit_id])
|
|
|
324 |
new_parent = None
|
|
|
325 |
while commits and limit:
|
|
|
326 |
# next commit is latest commit that's valid to log
|
|
|
327 |
if new_parent in commits:
|
|
|
328 |
ci = new_parent
|
|
|
329 |
else:
|
|
|
330 |
ci = max(commits, key=lambda ci:ci_times[ci])
|
|
|
331 |
commits.remove(ci)
|
|
|
332 |
if skip:
|
|
|
333 |
skip -= 1
|
|
|
334 |
continue
|
|
|
335 |
else:
|
|
|
336 |
limit -= 1
|
|
|
337 |
yield ci
|
|
|
338 |
# remove this commit from its parents children and add any childless
|
|
|
339 |
# parents to the 'ready set'
|
|
|
340 |
new_parent = None
|
|
|
341 |
for oid in ci_parents[ci]:
|
|
|
342 |
children = ci_children[oid]
|
|
|
343 |
children.discard(ci)
|
|
|
344 |
if not children:
|
|
|
345 |
commits.add(oid)
|
|
|
346 |
new_parent = oid
|
|
|
347 |
|
|
|
348 |
# Load all the runs to build a commit graph
|
|
|
349 |
ci_times = {}
|
|
|
350 |
ci_parents = {}
|
|
|
351 |
ci_children = defaultdict(set)
|
|
|
352 |
log.info('Build commit graph')
|
|
|
353 |
_visit(commit_id)
|
|
|
354 |
for oid, parents in ci_parents.iteritems():
|
|
|
355 |
for ci_parent in parents:
|
|
|
356 |
ci_children[ci_parent].add(oid)
|
|
|
357 |
|
|
|
358 |
return _gen_ids(commit_id, skip, limit)
|