Switch to unified view

a/Allura/allura/model/repo.py b/Allura/allura/model/repo.py
1
import re
2
import sys
3
import logging
4
from hashlib import sha1
5
from itertools import izip, chain
1
from datetime import datetime
6
from datetime import datetime
7
from collections import defaultdict
2
8
3
from ming import Document, Field
9
from pylons import g
10
11
from ming import Field, Index, collection
4
from ming import schema as S
12
from ming import schema as S
13
from ming.utils import LazyProperty
14
from ming.orm import mapper
5
15
16
from allura.lib import utils
17
from allura.lib import helpers as h
18
19
from .auth import User
6
from .session import main_doc_session, project_doc_session
20
from .session import main_doc_session, project_doc_session
21
from .session import repository_orm_session
7
22
8
class Commit(Document):
23
log = logging.getLogger(__name__)
9
    class __mongometa__:
24
10
        name = 'repo_ci'
11
        session = main_doc_session
12
        indexes = [
13
            ('parent_ids',),
14
            ('child_ids',),
15
            ('repo_ids',)]
16
    User = dict(name=str, email=str, date=datetime)
25
SUser = dict(name=str, email=str, date=datetime)
17
18
    _id = Field(str)
19
    tree_id = Field(str)
20
    committed = Field(User)
21
    authored = Field(User)
22
    message = Field(str)
23
    parent_ids = Field([str])
24
    child_ids = Field([str])
25
    repo_ids = Field([S.ObjectId()])
26
27
    def __repr__(self):
28
        return '%s %s' % (
29
            self._id[:7], self.summary)
30
31
    @property
32
    def summary(self):
33
        if self.message:
34
            summary = []
35
            for line in self.message.splitlines():
36
                line = line.rstrip()
37
                if line: summary.append(line)
38
                else: return ' '.join(summary)
39
            return ' '.join(summary)
40
        return ''
41
42
    def url(self):
43
        return ''
44
45
    def shorthand_id(self):
46
        return ''
47
48
    @property
49
    def author_url(self):
50
        return ''
51
52
class Tree(Document):
53
    class __mongometa__:
54
        name = 'repo_tree'
55
        session = main_doc_session
56
    ObjType=S.OneOf('blob', 'tree', 'submodule')
26
SObjType=S.OneOf('blob', 'tree', 'submodule')
27
QSIZE = 100
28
README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE)
57
29
58
    _id = Field(str)
30
# Basic commit information
31
CommitDoc = collection(
32
    'repo_ci', main_doc_session,
33
    Field('_id', str),
34
    Field('tree_id', str),
35
    Field('committed', SUser),
36
    Field('authored', SUser),
37
    Field('message', str),
38
    Field('parent_ids', [str], index=True),
39
    Field('child_ids', [str], index=True),
40
    Field('repo_ids', [ S.ObjectId() ], index=True))
41
42
# Basic tree information
43
TreeDoc = collection(
44
    'repo_tree', main_doc_session,
45
    Field('_id', str),
59
    tree_ids = Field([dict(name=str, id=str)])
46
    Field('tree_ids', [dict(name=str, id=str)]),
60
    blob_ids = Field([dict(name=str, id=str)])
47
    Field('blob_ids', [dict(name=str, id=str)]),
61
    other_ids = Field([dict(name=str, id=str, type=ObjType)])
48
    Field('other_ids', [dict(name=str, id=str, type=SObjType)]))
62
49
63
class LastCommit(Document):
50
# Information about the last commit to touch a tree/blob
64
    class __mongometa__:
51
LastCommitDoc = collection(
65
        name = 'repo_last_commit'
52
    'repo_last_commit', project_doc_session,
66
        session = project_doc_session
53
    Field('_id', str),
67
        indexes = [
54
    Field('repo_id', S.ObjectId()),
68
            ( 'repo_id', 'object_id'),
55
    Field('object_id', str),
69
            ]
56
    Field('commit_info', dict(
70
71
    _id = Field(str)
72
    repo_id=Field(S.ObjectId())
73
    object_id=Field(str)
74
    commit_info = Field(dict(
75
        id=str,
57
        id=str,
76
        date=datetime,
58
        date=datetime,
77
        author=str,
59
        author=str,
78
        author_email=str,
60
        author_email=str,
79
        author_url=str,
61
        author_url=str,
80
        href=str,
62
        href=str,
81
        shortlink=str,
63
        shortlink=str,
82
        summary=str))
64
        summary=str)),
65
    Index('repo_id', 'object_id'))
83
66
84
    @classmethod
67
# List of all trees contained within a commit
85
    def set_last_commit(cls, repo_id, oid, commit):
68
TreesDoc = collection(
86
        lc = cls(dict(
69
    'repo_trees', main_doc_session,
87
                _id='%s:%s' % (repo_id, oid),
70
    Field('_id', str),
88
                repo_id=repo_id,
71
    Field('tree_ids', [str]))
89
                object_id=oid,
90
                commit_info=dict(
91
                    id=commit._id,
92
                    author=commit.authored.name,
93
                    author_email=commit.authored.email,
94
                    author_url=commit.author_url,
95
                    date=commit.authored.date,
96
                    href=commit.url(),
97
                    shortlink=commit.shorthand_id(),
98
                    summary=commit.summary)))
99
        lc.m.save(safe=False)
100
        return lc
101
72
102
class Trees(Document):
73
# Information about which things were added/removed in  commit
103
    class __mongometa__:
74
DiffInfoDoc = collection(
104
        name = 'repo_trees'
75
    'repo_diffinfo', main_doc_session,
105
        session = main_doc_session
76
    Field('_id', str),
106
77
    Field(
107
    _id = Field(str) # commit ID
78
        'differences',
108
    tree_ids = Field([str]) # tree IDs
109
110
class DiffInfo(Document):
111
    class __mongometa__:
112
        name = 'repo_diffinfo'
113
        session = main_doc_session
114
115
    _id = Field(str)
116
    differences = Field([dict(name=str, lhs_id=str, rhs_id=str)])
79
        [ dict(name=str, lhs_id=str, rhs_id=str)]))
117
80
118
class BasicBlock(Document):
81
# List of commit runs (a run is a linear series of single-parent commits)
119
    class __mongometa__:
82
CommitRunDoc = collection(
120
        name = 'repo_basic_block'
83
    'repo_commitrun', main_doc_session,
121
        session = main_doc_session
84
    Field('_id', str),
122
        indexes = [
85
    Field('parent_commit_ids', [str]),
123
            ('commit_ids',) ]
86
    Field('commit_ids', [str], index=True),
87
    Field('commit_times', [datetime]))
124
88
125
    _id = Field(str)
89
class RepoObject(object):
126
    parent_commit_ids = Field([str])
127
    commit_ids = Field([str])
128
    commit_times = Field([datetime])
129
90
130
    def __repr__(self):
91
    def __repr__(self):
131
        return '%s: (P %s, T %s..%s (%d commits))' % (
92
        return '<%s %s>' % (
93
            self.__class__.__name__, self._id)
94
95
    def primary(self):
96
        return self
97
98
    def index_id(self):
99
        '''Globally unique artifact identifier.  Used for
100
        SOLR ID, shortlinks, and maybe elsewhere
101
        '''
102
        id = '%s.%s#%s' % (
103
            self.__class__.__module__,
104
            self.__class__.__name__,
132
            self._id[:6],
105
            self._id)
133
            [ oid[:6] for oid in self.parent_commit_ids ],
106
        return id.replace('.', '/')
134
            self.commit_ids[0][:6],
107
135
            self.commit_ids[-1][:6],
108
class Commit(RepoObject):
136
            len(self.commit_ids))
109
    # Ephemeral attrs
110
    repo=None
111
112
    @LazyProperty
113
    def author_url(self):
114
        u = User.by_email_address(self.authored.email)
115
        if u: return u.url()
116
117
    @LazyProperty
118
    def committer_url(self):
119
        u = User.by_email_address(self.committed.email)
120
        if u: return u.url()
121
122
    @LazyProperty
123
    def tree(self):
124
        if self.tree_id is None:
125
            self.tree_id = self.repo.compute_tree(self)
126
        if self.tree_id is None:
127
            return None
128
        t = Tree.query.get(object_id=self.tree_id)
129
        if t is None:
130
            self.tree_id = self.repo.compute_tree(self)
131
            t = Tree.query.get(object_id=self.tree_id)
132
        if t is not None: t.set_context(self)
133
        return t
134
135
    @LazyProperty
136
    def summary(self):
137
        message = h.really_unicode(self.message)
138
        first_line = message.split('\n')[0]
139
        return h.text.truncate(first_line, 50)
140
141
    def get_path(self, path):
142
        '''Return the blob on the given path'''
143
        if path.startswith('/'): path = path[1:]
144
        path_parts = path.split('/')
145
        return self.tree.get_blob(path_parts[-1], path_parts[:-1])
146
147
    def shorthand_id(self):
148
        return self.repo.shorthand_for_commit(self)
149
150
    @LazyProperty
151
    def symbolic_ids(self):
152
        return self.repo.symbolics_for_commit(self)
153
154
    def url(self):
155
        return self.repo.url_for_commit(self)
156
157
    def log_iter(self, skip, count):
158
        for oids in utils.chunked_iter(commitlog(self._id), QSIZE):
159
            oids = list(oids)
160
            commits = dict(
161
                (ci._id, ci) for ci in self.query.find(dict(
162
                        _id={'$in': oids})))
163
            for oid in oids:
164
                ci = commits[oid]
165
                ci.set_context(self.repo)
166
                yield ci
167
168
    def log(self, skip, count):
169
        return list(self.log_iter(skip, count))
170
171
    def count_revisions(self):
172
        result = 0
173
        for oid in commitlog(self): result += 1
174
        return result
175
176
    def context(self):
177
        result = dict(prev=None, next=None)
178
        if self.parent_ids:
179
            result['prev'] = self.query.get(_id=self.parent_ids[0])
180
        if self.child_ids:
181
            result['next'] = self.query.get(_id=self.child_ids[0])
182
        return result
183
184
class Tree(RepoObject):
185
    # Ephemeral attrs
186
    repo=None
187
    commit=None
188
    parent=None
189
    name=None
190
191
    def compute_hash(self):
192
        '''Compute a hash based on the contents of the tree.  Note that this
193
        hash does not necessarily correspond to any actual DVCS hash.
194
        '''
195
        lines = (
196
            [ 'tree' + x.name + x.id for x in self.tree_ids ]
197
            + [ 'blob' + x.name + x.id for x in self.blob_ids ]
198
            + [ x.type + x.name + x.id for x in self.other_ids ])
199
        sha_obj = sha1()
200
        for line in sorted(lines):
201
            sha_obj.update(line)
202
        return sha_obj.hexdigest()
203
204
    def set_context(self, commit_or_tree, name=None):
205
        assert commit_or_tree is not self
206
        self.repo = commit_or_tree.repo
207
        if name:
208
            self.commit = commit_or_tree.commit
209
            self.parent = commit_or_tree
210
            self.name = name
211
        else:
212
            self.commit = commit_or_tree
213
214
    def readme(self):
215
        name = None
216
        text = ''
217
        for x in self.blob_ids:
218
            if README_RE.match(x.name):
219
                name = x.name
220
                text = h.really_unicode(self.repo.blob_text(x.id))
221
                break
222
        if text == '':
223
            text = '<p><em>Empty File</em></p>'
224
        else:
225
            renderer = g.pypeline_markup.renderer(name)
226
            if renderer[1]:
227
                text = g.pypeline_markup.render(name,text)
228
            else:
229
                text = '<pre>%s</pre>' % text
230
        return (name, text)
231
232
    def ls(self):
233
        # Load last commit info
234
        oids = [ x.id for x in chain(self.tree_ids, self.object_ids, self.other_ids) ]
235
        lc_index = dict(
236
            (lc.object_id, lc.commit)
237
            for lc in LastCommitDoc.m.find(dict(
238
                    repo_id=self.repo._id,
239
                    object_id={'$in': oids})))
240
        results = []
241
        def _get_last_commit(oid):
242
            lc = lc_index.get(oid)
243
            if lc is None:
244
                lc = dict(
245
                    author=None,
246
                    author_email=None,
247
                    author_url=None,
248
                    date=None,
249
                    id=None,
250
                    href=None,
251
                    shortlink=None,
252
                    summary=None)
253
            return lc
254
        for x in sorted(self.tree_ids, key=lambda x:x.name):
255
            results.append(dict(
256
                    kind='DIR',
257
                    name=x.name,
258
                    href=x.name + '/',
259
                    last_commit=_get_last_commit(x.id)))
260
        for x in sorted(self.blob_ids, key=lambda x:x.name):
261
            results.append(dict(
262
                    kind='FILE',
263
                    name=x.name,
264
                    href=x.name + '/',
265
                    last_commit=_get_last_commit(x.id)))
266
        for x in sorted(self.other_ids, key=lambda x:x.name):
267
            results.append(dict(
268
                    kind=x.type,
269
                    name=x.name,
270
                    href=None,
271
                    last_commit=_get_last_commit(x.id)))
272
        return results
273
274
    def path(self):
275
        if self.parent:
276
            assert self.parent is not self
277
            return self.parent.path() + self.name + '/'
278
        else:
279
            return '/'
280
281
    def url(self):
282
        return self.commit.url() + 'tree' + self.path()
283
284
    @LazyProperty
285
    def by_name(self):
286
        d = dict((x.name, x) for x in self.other_ids)
287
        d.update(
288
            (x.name, dict(x, type='tree'))
289
            for x in self.tree_ids)
290
        d.update(
291
            (x.name, dict(x, type='blob'))
292
            for x in self.blob_ids)
293
        return d
294
295
    def is_blob(self, name):
296
        return self.by_name[name].type == 'blob'
297
298
mapper(Commit, CommitDoc, repository_orm_session)
299
mapper(Tree, TreeDoc, repository_orm_session)
300
301
def commitlog(commit_id, skip=0, limit=sys.maxint):
302
303
    seen = set()
304
    def _visit(commit_id):
305
        if commit_id in seen: return
306
        run = CommitRunDoc.m.get(commit_ids=commit_id)
307
        if run is None: return
308
        index = False
309
        for pos, (oid, time) in enumerate(izip(run.commit_ids, run.commit_times)):
310
            if oid == commit_id: index = True
311
            elif not index: continue
312
            seen.add(oid)
313
            ci_times[oid] = time
314
            if pos+1 < len(run.commit_ids):
315
                ci_parents[oid] = [ run.commit_ids[pos+1] ]
316
            else:
317
                ci_parents[oid] = run.parent_commit_ids
318
        for oid in run.parent_commit_ids:
319
            _visit(oid)
320
321
    def _gen_ids(commit_id, skip, limit):
322
        # Traverse the graph in topo order, yielding commit IDs
323
        commits = set([commit_id])
324
        new_parent = None
325
        while commits and limit:
326
            # next commit is latest commit that's valid to log
327
            if new_parent in commits:
328
                ci = new_parent
329
            else:
330
                ci = max(commits, key=lambda ci:ci_times[ci])
331
            commits.remove(ci)
332
            if skip:
333
                skip -= 1
334
                continue
335
            else:
336
                limit -= 1
337
            yield ci
338
            # remove this commit from its parents children and add any childless
339
            # parents to the 'ready set'
340
            new_parent = None
341
            for oid in ci_parents[ci]:
342
                children = ci_children[oid]
343
                children.discard(ci)
344
                if not children:
345
                    commits.add(oid)
346
                    new_parent = oid
347
348
    # Load all the runs to build a commit graph
349
    ci_times = {}
350
    ci_parents = {}
351
    ci_children = defaultdict(set)
352
    log.info('Build commit graph')
353
    _visit(commit_id)
354
    for oid, parents in ci_parents.iteritems():
355
        for ci_parent in parents:
356
            ci_children[ci_parent].add(oid)
357
358
    return _gen_ids(commit_id, skip, limit)