Switch to unified view

a/Allura/allura/model/repository.py b/Allura/allura/model/repository.py
...
...
8
from hashlib import sha1
8
from hashlib import sha1
9
from datetime import datetime
9
from datetime import datetime
10
from collections import defaultdict
10
from collections import defaultdict
11
11
12
import tg
12
import tg
13
from paste.deploy.converters import asbool
13
from pylons import c,g
14
from pylons import c,g, request
14
import pymongo.errors
15
import pymongo.errors
15
16
16
from ming import schema as S
17
from ming import schema as S
18
from ming.base import Object
17
from ming.utils import LazyProperty
19
from ming.utils import LazyProperty
18
from ming.orm import FieldProperty, session, Mapper
20
from ming.orm import FieldProperty, session, Mapper
19
from ming.orm.declarative import MappedClass
21
from ming.orm.declarative import MappedClass
20
22
21
22
from allura.lib.patience import SequenceMatcher
23
from allura.lib.patience import SequenceMatcher
23
from allura.lib import helpers as h
24
from allura.lib import helpers as h
24
from allura.lib import utils
25
from allura.lib import utils
25
26
26
from .artifact import Artifact, VersionedArtifact, Feed
27
from .artifact import Artifact, VersionedArtifact, Feed
27
from .auth import User
28
from .auth import User
28
from .session import repository_orm_session, project_orm_session
29
from .session import repository_orm_session, project_orm_session, main_doc_session
29
from .notification import Notification
30
from .notification import Notification
31
from .repo_refresh import refresh_repo
30
32
31
log = logging.getLogger(__name__)
33
log = logging.getLogger(__name__)
32
config = utils.ConfigProxy(
34
config = utils.ConfigProxy(
33
    common_suffix='forgemail.domain',
35
    common_suffix='forgemail.domain',
34
    common_prefix='forgemail.url')
36
    common_prefix='forgemail.url')
35
37
36
README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE)
38
README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE)
37
39
38
39
class RepositoryImplementation(object):
40
class RepositoryImplementation(object):
40
41
41
    # Repository-specific code
42
    # Repository-specific code
42
    def init(self): # pragma no cover
43
    def init(self): # pragma no cover
43
        raise NotImplementedError, 'init'
44
        raise NotImplementedError, 'init'
...
...
46
        raise NotImplementedError, 'clone_from'
47
        raise NotImplementedError, 'clone_from'
47
48
48
    def commit(self, revision): # pragma no cover
49
    def commit(self, revision): # pragma no cover
49
        raise NotImplementedError, 'commit'
50
        raise NotImplementedError, 'commit'
50
51
52
    def all_commit_ids(self): # pragma no cover
53
        raise NotImplementedError, 'all_commit_ids'
54
51
    def new_commits(self, all_commits=False): # pragma no cover
55
    def new_commits(self, all_commits=False): # pragma no cover
52
        '''Return any commit object_ids in the native repo that are not (yet) stored
56
        '''Return a list of native commits in topological order (heads first).
53
        in the database in topological order (parents first)'''
57
58
        "commit" is a repo-native object, NOT a Commit object.
59
        If all_commits is False, only return commits not already indexed.
60
        '''
54
        raise NotImplementedError, 'commit'
61
        raise NotImplementedError, 'new_commits'
62
63
    def commit_parents(self, commit):
64
        '''Return a list of native commits for the parents of the given (native)
65
        commit'''
66
        raise NotImplementedError, 'commit_parents'
55
67
56
    def commit_context(self, object_id): # pragma no cover
68
    def commit_context(self, object_id): # pragma no cover
57
        '''Returns {'prev':Commit, 'next':Commit}'''
69
        '''Returns {'prev':Commit, 'next':Commit}'''
58
        raise NotImplementedError, 'context'
70
        raise NotImplementedError, 'context'
59
71
...
...
62
        raise NotImplementedError, 'refresh_heads'
74
        raise NotImplementedError, 'refresh_heads'
63
75
64
    def refresh_commit(self, ci, seen_object_ids): # pragma no cover
76
    def refresh_commit(self, ci, seen_object_ids): # pragma no cover
65
        '''Refresh the data in the commit object 'ci' with data from the repo'''
77
        '''Refresh the data in the commit object 'ci' with data from the repo'''
66
        raise NotImplementedError, 'refresh_commit'
78
        raise NotImplementedError, 'refresh_commit'
79
80
    def refresh_commit_info(self, oid): # pragma no cover
81
        '''Refresh the data in the commit with id oid'''
82
        raise NotImplementedError, 'refresh_commit_info'
67
83
68
    def _setup_hooks(self): # pragma no cover
84
    def _setup_hooks(self): # pragma no cover
69
        '''Install a hook in the repository that will ping the refresh url for
85
        '''Install a hook in the repository that will ping the refresh url for
70
        the repo'''
86
        the repo'''
71
        raise NotImplementedError, '_setup_hooks'
87
        raise NotImplementedError, '_setup_hooks'
72
88
73
    def log(self, object_id, skip, count): # pragma no cover
89
    def log(self, object_id, skip, count): # pragma no cover
74
        '''Return a list of object_ids beginning at the given commit ID and continuing
90
        '''Return a list of (object_id, ci) beginning at the given commit ID and continuing
75
        to the parent nodes in a breadth-first traversal.  Also return a list of 'next commit' options
91
        to the parent nodes in a breadth-first traversal.  Also return a list of 'next commit' options
76
        (these are candidates for he next commit after 'count' commits have been
92
        (these are candidates for he next commit after 'count' commits have been
77
        exhausted).'''
93
        exhausted).'''
78
        raise NotImplementedError, '_log'
94
        raise NotImplementedError, 'log'
79
95
80
    def compute_tree(self, commit, path='/'):
96
    def compute_tree(self, commit, path='/'):
81
        '''Used in hg and svn to compute a git-like-tree lazily'''
97
        '''Used in hg and svn to compute a git-like-tree lazily'''
82
        raise NotImplementedError, 'compute_tree'
98
        raise NotImplementedError, 'compute_tree'
83
99
100
    def compute_tree_new(self, commit, path='/'):
101
        '''Used in hg and svn to compute a git-like-tree lazily with the new models'''
102
        raise NotImplementedError, 'compute_tree'
103
84
    def open_blob(self, blob): # pragma no cover
104
    def open_blob(self, blob): # pragma no cover
85
        '''Return a file-like object that contains the contents of the blob'''
105
        '''Return a file-like object that contains the contents of the blob'''
86
        raise NotImplementedError, 'open_blob'
106
        raise NotImplementedError, 'open_blob'
87
107
108
    @classmethod
88
    def shorthand_for_commit(self, commit):
109
    def shorthand_for_commit(cls, oid):
89
        return '[%s]' % commit.object_id[:6]
110
        return '[%s]' % oid[:6]
90
111
91
    def symbolics_for_commit(self, commit):
112
    def symbolics_for_commit(self, commit):
92
        '''Return symbolic branch and tag names for a commit.
113
        '''Return symbolic branch and tag names for a commit.
93
        Default generic implementation is provided, subclasses
114
        Default generic implementation is provided, subclasses
94
        may override if they have more efficient means.'''
115
        may override if they have more efficient means.'''
95
        branches = [b.name for b in self._repo.branches if b.object_id == commit.object_id]
116
        branches = [b.name for b in self._repo.branches if b.object_id == commit.object_id]
96
        tags = [t.name for t in self._repo.repo_tags if t.object_id == commit.object_id]
117
        tags = [t.name for t in self._repo.repo_tags if t.object_id == commit.object_id]
97
        return branches, tags
118
        return branches, tags
98
119
99
    def url_for_commit(self, commit):
120
    def url_for_commit(self, commit):
121
        'return an URL, given either a commit or object id'
122
        if isinstance(commit, basestring):
123
            object_id = commit
124
        else:
125
            object_id = commit.object_id
100
        return '%sci/%s/' % (self._repo.url(), commit.object_id)
126
        return '%sci/%s/' % (self._repo.url(), object_id)
101
127
102
    def _setup_paths(self, create_repo_dir=True):
128
    def _setup_paths(self, create_repo_dir=True):
103
        if not self._repo.fs_path.endswith('/'): self._repo.fs_path += '/'
129
        if not self._repo.fs_path.endswith('/'): self._repo.fs_path += '/'
104
        fullname = self._repo.fs_path + self._repo.name
130
        fullname = self._repo.fs_path + self._repo.name
105
        path = fullname if create_repo_dir else self._repo.fs_path
131
        path = fullname if create_repo_dir else self._repo.fs_path
...
...
165
    # Proxy to _impl
191
    # Proxy to _impl
166
    def init(self):
192
    def init(self):
167
        return self._impl.init()
193
        return self._impl.init()
168
    def commit(self, rev):
194
    def commit(self, rev):
169
        return self._impl.commit(rev)
195
        return self._impl.commit(rev)
196
    def all_commit_ids(self):
197
        return self._impl.all_commit_ids()
198
    def refresh_commit_info(self, oid, seen):
199
        return self._impl.refresh_commit_info(oid, seen)
170
    def commit_context(self, commit):
200
    def commit_context(self, commit):
171
        return self._impl.commit_context(commit)
201
        return self._impl.commit_context(commit)
172
    def open_blob(self, blob):
202
    def open_blob(self, blob):
173
        return self._impl.open_blob(blob)
203
        return self._impl.open_blob(blob)
174
    def shorthand_for_commit(self, commit):
204
    def shorthand_for_commit(self, oid):
175
        return self._impl.shorthand_for_commit(commit)
205
        return self._impl.shorthand_for_commit(oid)
176
    def symbolics_for_commit(self, commit):
206
    def symbolics_for_commit(self, commit):
177
        return self._impl.symbolics_for_commit(commit)
207
        return self._impl.symbolics_for_commit(commit)
178
    def url_for_commit(self, commit):
208
    def url_for_commit(self, commit):
179
        return self._impl.url_for_commit(commit)
209
        return self._impl.url_for_commit(commit)
180
    def compute_tree(self, commit, path='/'):
210
    def compute_tree(self, commit, path='/'):
181
        return self._impl.compute_tree(commit, path)
211
        return self._impl.compute_tree(commit, path)
212
    def compute_tree_new(self, commit, path='/'):
213
        return self._impl.compute_tree_new(commit, path)
182
214
183
    def _log(self, rev, skip, max_count):
215
    def _log(self, rev, skip, max_count):
184
        ci = self.commit(rev)
216
        ci = self.commit(rev)
185
        if ci is None: return []
217
        if ci is None: return []
186
        return ci.log(int(skip), int(max_count))
218
        return ci.log(int(skip), int(max_count))
...
...
282
        return content_type, encoding
314
        return content_type, encoding
283
315
284
    def refresh(self, all_commits=False, notify=True):
316
    def refresh(self, all_commits=False, notify=True):
285
        '''Find any new commits in the repository and update'''
317
        '''Find any new commits in the repository and update'''
286
        self._impl.refresh_heads()
318
        self._impl.refresh_heads()
319
        if asbool(tg.config.get('scm.new_refresh')):
320
            refresh_repo(self, all_commits, notify)
321
            notify = False # don't double notify
322
287
        self.status = 'analyzing'
323
        self.status = 'analyzing'
288
        session(self).flush()
324
        session(self).flush()
289
        sess = session(Commit)
325
        sess = session(Commit)
290
        log.info('Refreshing repository %s', self)
326
        log.info('Refreshing repository %s', self)
291
        commit_ids = self._impl.new_commits(all_commits)
327
        commit_ids = self._impl.new_commits(all_commits)
292
        log.info('... %d new commits', len(commit_ids))
328
        log.info('... %d new commits', len(commit_ids))
293
        # Refresh history
329
        # Refresh history
294
        i=0
295
        seen_object_ids = set()
330
        seen_object_ids = set()
296
        commit_msgs = []
331
        commit_msgs = []
332
        i=0
297
        for i, oid in enumerate(commit_ids):
333
        for i, oid in enumerate(commit_ids):
298
            if len(seen_object_ids) > 10000: # pragma no cover
334
            if len(seen_object_ids) > 10000: # pragma no cover
299
                log.info('... flushing seen object cache')
335
                log.info('... flushing seen object cache')
300
                seen_object_ids = set()
336
                seen_object_ids = set()
301
            ci, isnew = Commit.upsert(oid)
337
            ci, isnew = Commit.upsert(oid)
302
            if not isnew and not all_commits:
338
            if not isnew and not all_commits:
303
                 # race condition, let the other proc handle it
304
                sess.expunge(ci)
339
                sess.expunge(ci)
305
                continue
340
                continue
306
            ci.set_context(self)
341
            ci.set_context(self)
307
            self._impl.refresh_commit(ci, seen_object_ids)
342
            self._impl.refresh_commit(ci, seen_object_ids)
308
            if (i+1) % self.BATCH_SIZE == 0:
343
            if (i+1) % self.BATCH_SIZE == 0:
...
...
337
                artifact=self,
372
                artifact=self,
338
                topic='metadata',
373
                topic='metadata',
339
                subject=subject,
374
                subject=subject,
340
                text=text)
375
                text=text)
341
        log.info('...... flushing %d commits (%d total)',
376
        log.info('...... flushing %d commits (%d total)',
342
                 i % self.BATCH_SIZE, i)
377
                 (i+1) % self.BATCH_SIZE, i+1)
343
        sess.flush()
378
        sess.flush()
344
        sess.clear()
379
        sess.clear()
345
        # Mark all commits in this repo as being in this repo
380
        # Mark all commits in this repo as being in this repo
346
        all_commit_ids = self._impl.new_commits(True)
381
        all_commit_ids = self._impl.new_commits(True)
347
        Commit.query.update(
382
        Commit.query.update(
...
...
376
                log.info('...... flushing %d commits (%d total)',
411
                log.info('...... flushing %d commits (%d total)',
377
                         self.BATCH_SIZE, (i+1))
412
                         self.BATCH_SIZE, (i+1))
378
                sess.flush()
413
                sess.flush()
379
                sess.clear()
414
                sess.clear()
380
        log.info('...... flushing %d commits (%d total)',
415
        log.info('...... flushing %d commits (%d total)',
381
                 i % self.BATCH_SIZE, i)
416
                 (i+1) % self.BATCH_SIZE, i+1)
382
        sess.flush()
417
        sess.flush()
383
        sess.clear()
418
        sess.clear()
384
419
385
    def push_upstream_context(self):
420
    def push_upstream_context(self):
386
        project, rest=h.find_project(self.upstream_repo.name)
421
        project, rest=h.find_project(self.upstream_repo.name)
...
...
698
        if path.startswith('/'): path = path[1:]
733
        if path.startswith('/'): path = path[1:]
699
        path_parts = path.split('/')
734
        path_parts = path.split('/')
700
        return self.tree.get_blob(path_parts[-1], path_parts[:-1])
735
        return self.tree.get_blob(path_parts[-1], path_parts[:-1])
701
736
702
    def shorthand_id(self):
737
    def shorthand_id(self):
703
        return self.repo.shorthand_for_commit(self)
738
        return self.repo.shorthand_for_commit(self.object_id)
704
739
705
    @LazyProperty
740
    @LazyProperty
706
    def symbolic_ids(self):
741
    def symbolic_ids(self):
707
        return self.repo.symbolics_for_commit(self)
742
        return self.repo.symbolics_for_commit(self)
708
743
...
...
782
817
783
class Tree(RepoObject):
818
class Tree(RepoObject):
784
    '''
819
    '''
785
    A representation of files & directories.  E.g. what is present at a single commit
820
    A representation of files & directories.  E.g. what is present at a single commit
786
821
787
    :var object_ids: dict(object_id: name)  Set by _refresh_tree in the scm implementation
822
    :var object_ids: dict(object_id: name)  Set by refresh_tree in the scm implementation
788
    '''
823
    '''
789
    class __mongometa__:
824
    class __mongometa__:
790
        polymorphic_identity='tree'
825
        polymorphic_identity='tree'
791
    type_s = 'Tree'
826
    type_s = 'Tree'
792
827