Switch to side-by-side view

--- a/Allura/allura/model/repository.py
+++ b/Allura/allura/model/repository.py
@@ -11,7 +11,6 @@
 
 import tg
 from pylons import c,g, request
-from paste.registry import StackedObjectProxy
 import pymongo.errors
 
 from ming import schema as S
@@ -73,20 +72,13 @@
         '''Sets repository metadata such as heads, tags, and branches'''
         raise NotImplementedError, 'refresh_heads'
 
-    def refresh_commit(self, ci, native_ci): # pragma no cover
+    def refresh_commit(self, ci, seen_object_ids): # pragma no cover
         '''Refresh the data in the commit object 'ci' with data from the repo'''
         raise NotImplementedError, 'refresh_commit'
 
     def refresh_commit_info(self, oid): # pragma no cover
         '''Refresh the data in the commit with id oid'''
         raise NotImplementedError, 'refresh_commit_info'
-
-    def refresh_tree(self, tree): # pragma no cover
-        '''Refresh the data in the tree object 'tree' with data from the repo'''
-        raise NotImplementedError, 'refresh_tree'
-
-    def generate_shortlinks(self, ci): # pragma no cover
-        raise NotImplementedError, 'generate_shortlinks'
 
     def object_id(self, obj): # pragma no cover
         '''Return the object_id for the given native object'''
@@ -166,7 +158,6 @@
     branches = FieldProperty([dict(name=str,object_id=str, count=int)])
     repo_tags = FieldProperty([dict(name=str,object_id=str, count=int)])
     upstream_repo = FieldProperty(dict(name=str,url=str))
-    refresh_context = StackedObjectProxy(name='refresh_context')
 
     def __init__(self, **kw):
         if 'name' in kw and 'tool' in kw:
@@ -203,8 +194,8 @@
         return self._impl.commit_context(commit)
     def open_blob(self, blob):
         return self._impl.open_blob(blob)
-    def shorthand_for_commit(self, commit):
-        return self._impl.shorthand_for_commit(commit)
+    def shorthand_for_commit(self, oid):
+        return self._impl.shorthand_for_commit(oid)
     def symbolics_for_commit(self, commit):
         return self._impl.symbolics_for_commit(commit)
     def url_for_commit(self, commit):
@@ -313,69 +304,28 @@
                 content_type, encoding = 'application/octet-stream', None
         return content_type, encoding
 
-    def refresh_ancestor_graph(self, commits):
-        '''Make sure the CommitAncestor collection is up-to-date based on
-        the given list of native commits
-        '''
-        ca_doc = mapper(CommitAncestor).doc_cls
-        sess = main_doc_session
-        ancestor_cache = {} # ancestor_cache[oid] = [ a_oid0, a_oid1...]
-        def _ancestors(ci, oid=None):
-            if oid is None: oid = self._impl.object_id(ci)
-            if oid in ancestor_cache:
-                return ancestor_cache[oid]
-            stored_ancestors = []
-            for ca in sess.find(ca_doc, dict(object_id=oid)):
-                stored_ancestors.extend(ca.ancestor_ids)
-            if stored_ancestors:
-                # Ancestors already stored in MongoDB
-                ancestor_cache[oid] = stored_ancestors
-                return stored_ancestors
-            ancestor_ids = set()
-            for p_ci in self._impl.commit_parents(ci):
-                p_oid = self._impl.object_id(p_ci)
-                ancestor_ids.add(p_oid)
-                ancestor_ids.update(_ancestors(p_ci, p_oid))
-            result = ancestor_cache[oid] = list(ancestor_ids)
-            for chunk in utils.chunked_iter(result, self.BATCH_SIZE):
-                sess.insert(ca_doc(
-                        dict(object_id=oid, ancestor_ids=list(chunk))))
-
-        # Compute graph in chunks to save memory
-        for i, ci in enumerate(reversed(commits)):
-            _ancestors(ci)
-            if (i+1) % (10 * self.BATCH_SIZE) == 0:
-                log.info('Computed ancestors for %d commits', i+1)
-                ancestor_cache = {}
-        log.info('Computed ancestors for %d commits', i+1)
-
     def refresh(self, all_commits=False, notify=True):
         '''Find any new commits in the repository and update'''
-        request.environ['paste.registry'].register(
-            self.refresh_context,
-            Object(
-                seen_oids=set(),
-                max_manifest_size=self.BATCH_SIZE))
         self._impl.refresh_heads()
         self.status = 'analyzing'
         session(self).flush()
         sess = session(Commit)
         log.info('Refreshing repository %s', self)
-        commits = self._impl.new_commits(all_commits)
-        log.info('... %d new commits', len(commits))
-        # self.refresh_ancestor_graph(commits)
+        commit_ids = self._impl.new_commits(all_commits)
+        log.info('... %d new commits', len(commit_ids))
         # Refresh history
+        seen_object_ids = set()
         commit_msgs = []
-        for i, n_ci in enumerate(commits):
-            oid = self._impl.object_id(n_ci)
-            if oid in self.refresh_context.seen_oids: continue
-            self.refresh_context.seen_oids.add(oid)
+        for i, oid in enumerate(commit_ids):
+            if len(seen_object_ids) > 10000: # pragma no cover
+                log.info('... flushing seen object cache')
+                seen_object_ids = set()
             ci, isnew = Commit.upsert(oid)
-            ci.set_context(self)
             if not isnew and not all_commits:
                 sess.expunge(ci)
                 continue
-            self._impl.refresh_commit(ci, n_ci)
+            ci.set_context(self)
+            self._impl.refresh_commit(ci, seen_object_ids)
             if (i+1) % self.BATCH_SIZE == 0:
                 log.info('...... flushing %d commits (%d total)',
                          self.BATCH_SIZE, (i+1))
@@ -413,52 +363,31 @@
                  (i+1) % self.BATCH_SIZE, i+1)
         sess.flush()
         sess.clear()
-        # Find all commits that don't know they're in this repo. Mark them as
-        # being in this repo and add their shorlinks
-        ca_doc = mapper(CommitAncestor).doc_cls
-        ci_doc = mapper(Commit).doc_cls
-        visited_cis = set()
-        for ancestor in main_doc_session.find(
-            ca_doc,
-            dict(object_id={'$in':[hd.object_id for hd in self.heads]})):
-            ancestor_ids = [
-                oid for oid in ancestor.ancestor_ids
-                if oid not in visited_cis ]
-            visited_cis.update(ancestor_ids)
-            for ignorant_ci in main_doc_session.find(
-                ci_doc,
-                dict(
-                    object_id={'$in': ancestor_ids},
-                    repositories={'$ne': self._id }),
-                fields=['_id', 'object_id']):
-                ignorant_ci.repositories.append(self._id)
-                main_doc_session.update_partial(
-                    ci_doc, dict(_id=ignorant_ci._id),
-                    {'$set': dict(repositories=ignorant_ci.repositories) })
-                ignorant_ci.update(
-                    project_id=c.project._id,
-                    app_config_id=c.app.config._id,
-                    url=self.url_for_commit(ignorant_ci))
-                self._impl.generate_shortlinks(ignorant_ci)
+        # Mark all commits in this repo as being in this repo
+        all_commit_ids = self._impl.new_commits(True)
+        Commit.query.update(
+            dict(
+                object_id={'$in':all_commit_ids},
+                repositories={'$ne':self._id}),
+            {'$push':dict(repositories=self._id)},
+            upsert=False, multi=True)
         self.compute_diffs()
         log.info('... refreshed repository %s.  Found %d new commits',
-                 self, len(commits))
+                 self, len(commit_ids))
         self.status = 'ready'
         for head in self.heads + self.branches + self.repo_tags:
             ci = self.commit(head.object_id)
             if ci is not None:
                 head.count = ci.count_revisions()
         session(self).flush()
-        return len(commits)
+        return len(commit_ids)
 
     def compute_diffs(self):
-        commits = self._impl.new_commits(all_commits=True)
+        commit_ids = self._impl.new_commits(all_commits=True)
         sess = session(Commit)
         # Compute diffs on all commits
         log.info('... computing diffs')
-        i=0
-        for i, n_ci in enumerate(reversed(commits)):
-            oid = self._impl.object_id(n_ci)
+        for i, oid in enumerate(commit_ids):
             ci = self._impl.commit(oid)
             ci.tree.set_last_commit(ci, self)
             if not ci.diffs_computed:
@@ -713,48 +642,6 @@
         if not lc.object_ids:
             lc.object_ids, lc.candidates = repo._impl.log(object_id, 0, 50)
         return lc
-
-class CommitAncestor(MappedClass):
-    class __mongometa__:
-        session = repository_orm_session
-        name='commit_ancestor'
-        indexes = [
-            ('object_id'), ('ancestor_ids') ]
-
-    _id = FieldProperty(S.ObjectId)
-    object_id = FieldProperty(str)
-    ancestor_ids = FieldProperty([str])
-
-class Manifest(MappedClass):
-    BATCH_SIZE=5000
-    class __mongometa__:
-        session = repository_orm_session
-        name='manifest'
-        indexes = [ ('object_id') ]
-
-    _id = FieldProperty(S.ObjectId)
-    object_id = FieldProperty(str)
-    object_ids = FieldProperty([
-            dict(object_id=str, name=str)])
-    _cache = LRUCache(BATCH_SIZE, 30)
-
-    @classmethod
-    def get(cls, oid):
-        result = cls._cache.get(
-            oid,
-            lambda: cls.query.find(dict(object_id=oid)).all())
-        if not result:
-            cls._cache.evict(oid)
-        return result
-
-    @classmethod
-    def from_iter(cls, oid, iterable):
-        cls.query.remove(dict(object_id=oid))
-        result = [
-            cls(object_id=oid, object_ids=list(chunk))
-            for chunk in utils.chunked_iter(iterable, cls.BATCH_SIZE) ]
-        cls._cache.set(oid, result)
-        return result
 
 class Commit(RepoObject):
     class __mongometa__:
@@ -833,7 +720,7 @@
         return self.tree.get_blob(path_parts[-1], path_parts[:-1])
 
     def shorthand_id(self):
-        return self.repo.shorthand_for_commit(self)
+        return self.repo.shorthand_for_commit(self.object_id)
 
     @LazyProperty
     def symbolic_ids(self):