Switch to side-by-side view

--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -1,77 +1,59 @@
+import re
+import sys
+import logging
+from hashlib import sha1
+from itertools import izip, chain
 from datetime import datetime
-
-from ming import Document, Field
+from collections import defaultdict
+
+from pylons import g
+
+from ming import Field, Index, collection
 from ming import schema as S
-
+from ming.utils import LazyProperty
+from ming.orm import mapper
+
+from allura.lib import utils
+from allura.lib import helpers as h
+
+from .auth import User
 from .session import main_doc_session, project_doc_session
-
-class Commit(Document):
-    class __mongometa__:
-        name = 'repo_ci'
-        session = main_doc_session
-        indexes = [
-            ('parent_ids',),
-            ('child_ids',),
-            ('repo_ids',)]
-    User = dict(name=str, email=str, date=datetime)
-
-    _id = Field(str)
-    tree_id = Field(str)
-    committed = Field(User)
-    authored = Field(User)
-    message = Field(str)
-    parent_ids = Field([str])
-    child_ids = Field([str])
-    repo_ids = Field([S.ObjectId()])
-
-    def __repr__(self):
-        return '%s %s' % (
-            self._id[:7], self.summary)
-
-    @property
-    def summary(self):
-        if self.message:
-            summary = []
-            for line in self.message.splitlines():
-                line = line.rstrip()
-                if line: summary.append(line)
-                else: return ' '.join(summary)
-            return ' '.join(summary)
-        return ''
-
-    def url(self):
-        return ''
-
-    def shorthand_id(self):
-        return ''
-
-    @property
-    def author_url(self):
-        return ''
-
-class Tree(Document):
-    class __mongometa__:
-        name = 'repo_tree'
-        session = main_doc_session
-    ObjType=S.OneOf('blob', 'tree', 'submodule')
-
-    _id = Field(str)
-    tree_ids = Field([dict(name=str, id=str)])
-    blob_ids = Field([dict(name=str, id=str)])
-    other_ids = Field([dict(name=str, id=str, type=ObjType)])
-
-class LastCommit(Document):
-    class __mongometa__:
-        name = 'repo_last_commit'
-        session = project_doc_session
-        indexes = [
-            ( 'repo_id', 'object_id'),
-            ]
-
-    _id = Field(str)
-    repo_id=Field(S.ObjectId())
-    object_id=Field(str)
-    commit_info = Field(dict(
+from .session import repository_orm_session
+
+log = logging.getLogger(__name__)
+
+SUser = dict(name=str, email=str, date=datetime)
+SObjType=S.OneOf('blob', 'tree', 'submodule')
+QSIZE = 100
+README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE)
+
+# Basic commit information
+CommitDoc = collection(
+    'repo_ci', main_doc_session,
+    Field('_id', str),
+    Field('tree_id', str),
+    Field('committed', SUser),
+    Field('authored', SUser),
+    Field('message', str),
+    Field('parent_ids', [str], index=True),
+    Field('child_ids', [str], index=True),
+    Field('repo_ids', [ S.ObjectId() ], index=True))
+
+# Basic tree information
+TreeDoc = collection(
+    'repo_tree', main_doc_session,
+    Field('_id', str),
+    Field('tree_ids', [dict(name=str, id=str)]),
+    Field('blob_ids', [dict(name=str, id=str)]),
+    Field('other_ids', [dict(name=str, id=str, type=SObjType)]))
+
+# Information about the last commit to touch a tree/blob
+LastCommitDoc = collection(
+    'repo_last_commit', project_doc_session,
+    Field('_id', str),
+    Field('repo_id', S.ObjectId()),
+    Field('object_id', str),
+    Field('commit_info', dict(
         id=str,
         date=datetime,
         author=str,
@@ -79,58 +61,298 @@
         author_url=str,
         href=str,
         shortlink=str,
-        summary=str))
-
-    @classmethod
-    def set_last_commit(cls, repo_id, oid, commit):
-        lc = cls(dict(
-                _id='%s:%s' % (repo_id, oid),
-                repo_id=repo_id,
-                object_id=oid,
-                commit_info=dict(
-                    id=commit._id,
-                    author=commit.authored.name,
-                    author_email=commit.authored.email,
-                    author_url=commit.author_url,
-                    date=commit.authored.date,
-                    href=commit.url(),
-                    shortlink=commit.shorthand_id(),
-                    summary=commit.summary)))
-        lc.m.save(safe=False)
-        return lc
-
-class Trees(Document):
-    class __mongometa__:
-        name = 'repo_trees'
-        session = main_doc_session
-
-    _id = Field(str) # commit ID
-    tree_ids = Field([str]) # tree IDs
-
-class DiffInfo(Document):
-    class __mongometa__:
-        name = 'repo_diffinfo'
-        session = main_doc_session
-
-    _id = Field(str)
-    differences = Field([dict(name=str, lhs_id=str, rhs_id=str)])
-
-class BasicBlock(Document):
-    class __mongometa__:
-        name = 'repo_basic_block'
-        session = main_doc_session
-        indexes = [
-            ('commit_ids',) ]
-
-    _id = Field(str)
-    parent_commit_ids = Field([str])
-    commit_ids = Field([str])
-    commit_times = Field([datetime])
+        summary=str)),
+    Index('repo_id', 'object_id'))
+
+# List of all trees contained within a commit
+TreesDoc = collection(
+    'repo_trees', main_doc_session,
+    Field('_id', str),
+    Field('tree_ids', [str]))
+
+# Information about which things were added/removed in  commit
+DiffInfoDoc = collection(
+    'repo_diffinfo', main_doc_session,
+    Field('_id', str),
+    Field(
+        'differences',
+        [ dict(name=str, lhs_id=str, rhs_id=str)]))
+
+# List of commit runs (a run is a linear series of single-parent commits)
+CommitRunDoc = collection(
+    'repo_commitrun', main_doc_session,
+    Field('_id', str),
+    Field('parent_commit_ids', [str]),
+    Field('commit_ids', [str], index=True),
+    Field('commit_times', [datetime]))
+
+class RepoObject(object):
 
     def __repr__(self):
-        return '%s: (P %s, T %s..%s (%d commits))' % (
-            self._id[:6],
-            [ oid[:6] for oid in self.parent_commit_ids ],
-            self.commit_ids[0][:6],
-            self.commit_ids[-1][:6],
-            len(self.commit_ids))
+        return '<%s %s>' % (
+            self.__class__.__name__, self._id)
+
+    def primary(self):
+        return self
+
+    def index_id(self):
+        '''Globally unique artifact identifier.  Used for
+        SOLR ID, shortlinks, and maybe elsewhere
+        '''
+        id = '%s.%s#%s' % (
+            self.__class__.__module__,
+            self.__class__.__name__,
+            self._id)
+        return id.replace('.', '/')
+
+class Commit(RepoObject):
+    # Ephemeral attrs
+    repo=None
+
+    @LazyProperty
+    def author_url(self):
+        u = User.by_email_address(self.authored.email)
+        if u: return u.url()
+
+    @LazyProperty
+    def committer_url(self):
+        u = User.by_email_address(self.committed.email)
+        if u: return u.url()
+
+    @LazyProperty
+    def tree(self):
+        if self.tree_id is None:
+            self.tree_id = self.repo.compute_tree(self)
+        if self.tree_id is None:
+            return None
+        t = Tree.query.get(object_id=self.tree_id)
+        if t is None:
+            self.tree_id = self.repo.compute_tree(self)
+            t = Tree.query.get(object_id=self.tree_id)
+        if t is not None: t.set_context(self)
+        return t
+
+    @LazyProperty
+    def summary(self):
+        message = h.really_unicode(self.message)
+        first_line = message.split('\n')[0]
+        return h.text.truncate(first_line, 50)
+
+    def get_path(self, path):
+        '''Return the blob on the given path'''
+        if path.startswith('/'): path = path[1:]
+        path_parts = path.split('/')
+        return self.tree.get_blob(path_parts[-1], path_parts[:-1])
+
+    def shorthand_id(self):
+        return self.repo.shorthand_for_commit(self)
+
+    @LazyProperty
+    def symbolic_ids(self):
+        return self.repo.symbolics_for_commit(self)
+
+    def url(self):
+        return self.repo.url_for_commit(self)
+
+    def log_iter(self, skip, count):
+        for oids in utils.chunked_iter(commitlog(self._id), QSIZE):
+            oids = list(oids)
+            commits = dict(
+                (ci._id, ci) for ci in self.query.find(dict(
+                        _id={'$in': oids})))
+            for oid in oids:
+                ci = commits[oid]
+                ci.set_context(self.repo)
+                yield ci
+
+    def log(self, skip, count):
+        return list(self.log_iter(skip, count))
+
+    def count_revisions(self):
+        result = 0
+        for oid in commitlog(self): result += 1
+        return result
+
+    def context(self):
+        result = dict(prev=None, next=None)
+        if self.parent_ids:
+            result['prev'] = self.query.get(_id=self.parent_ids[0])
+        if self.child_ids:
+            result['next'] = self.query.get(_id=self.child_ids[0])
+        return result
+
+class Tree(RepoObject):
+    # Ephemeral attrs
+    repo=None
+    commit=None
+    parent=None
+    name=None
+
+    def compute_hash(self):
+        '''Compute a hash based on the contents of the tree.  Note that this
+        hash does not necessarily correspond to any actual DVCS hash.
+        '''
+        lines = (
+            [ 'tree' + x.name + x.id for x in self.tree_ids ]
+            + [ 'blob' + x.name + x.id for x in self.blob_ids ]
+            + [ x.type + x.name + x.id for x in self.other_ids ])
+        sha_obj = sha1()
+        for line in sorted(lines):
+            sha_obj.update(line)
+        return sha_obj.hexdigest()
+
+    def set_context(self, commit_or_tree, name=None):
+        assert commit_or_tree is not self
+        self.repo = commit_or_tree.repo
+        if name:
+            self.commit = commit_or_tree.commit
+            self.parent = commit_or_tree
+            self.name = name
+        else:
+            self.commit = commit_or_tree
+
+    def readme(self):
+        name = None
+        text = ''
+        for x in self.blob_ids:
+            if README_RE.match(x.name):
+                name = x.name
+                text = h.really_unicode(self.repo.blob_text(x.id))
+                break
+        if text == '':
+            text = '<p><em>Empty File</em></p>'
+        else:
+            renderer = g.pypeline_markup.renderer(name)
+            if renderer[1]:
+                text = g.pypeline_markup.render(name,text)
+            else:
+                text = '<pre>%s</pre>' % text
+        return (name, text)
+
+    def ls(self):
+        # Load last commit info
+        oids = [ x.id for x in chain(self.tree_ids, self.object_ids, self.other_ids) ]
+        lc_index = dict(
+            (lc.object_id, lc.commit)
+            for lc in LastCommitDoc.m.find(dict(
+                    repo_id=self.repo._id,
+                    object_id={'$in': oids})))
+        results = []
+        def _get_last_commit(oid):
+            lc = lc_index.get(oid)
+            if lc is None:
+                lc = dict(
+                    author=None,
+                    author_email=None,
+                    author_url=None,
+                    date=None,
+                    id=None,
+                    href=None,
+                    shortlink=None,
+                    summary=None)
+            return lc
+        for x in sorted(self.tree_ids, key=lambda x:x.name):
+            results.append(dict(
+                    kind='DIR',
+                    name=x.name,
+                    href=x.name + '/',
+                    last_commit=_get_last_commit(x.id)))
+        for x in sorted(self.blob_ids, key=lambda x:x.name):
+            results.append(dict(
+                    kind='FILE',
+                    name=x.name,
+                    href=x.name + '/',
+                    last_commit=_get_last_commit(x.id)))
+        for x in sorted(self.other_ids, key=lambda x:x.name):
+            results.append(dict(
+                    kind=x.type,
+                    name=x.name,
+                    href=None,
+                    last_commit=_get_last_commit(x.id)))
+        return results
+
+    def path(self):
+        if self.parent:
+            assert self.parent is not self
+            return self.parent.path() + self.name + '/'
+        else:
+            return '/'
+
+    def url(self):
+        return self.commit.url() + 'tree' + self.path()
+
+    @LazyProperty
+    def by_name(self):
+        d = dict((x.name, x) for x in self.other_ids)
+        d.update(
+            (x.name, dict(x, type='tree'))
+            for x in self.tree_ids)
+        d.update(
+            (x.name, dict(x, type='blob'))
+            for x in self.blob_ids)
+        return d
+
+    def is_blob(self, name):
+        return self.by_name[name].type == 'blob'
+
+mapper(Commit, CommitDoc, repository_orm_session)
+mapper(Tree, TreeDoc, repository_orm_session)
+
+def commitlog(commit_id, skip=0, limit=sys.maxint):
+
+    seen = set()
+    def _visit(commit_id):
+        if commit_id in seen: return
+        run = CommitRunDoc.m.get(commit_ids=commit_id)
+        if run is None: return
+        index = False
+        for pos, (oid, time) in enumerate(izip(run.commit_ids, run.commit_times)):
+            if oid == commit_id: index = True
+            elif not index: continue
+            seen.add(oid)
+            ci_times[oid] = time
+            if pos+1 < len(run.commit_ids):
+                ci_parents[oid] = [ run.commit_ids[pos+1] ]
+            else:
+                ci_parents[oid] = run.parent_commit_ids
+        for oid in run.parent_commit_ids:
+            _visit(oid)
+
+    def _gen_ids(commit_id, skip, limit):
+        # Traverse the graph in topo order, yielding commit IDs
+        commits = set([commit_id])
+        new_parent = None
+        while commits and limit:
+            # next commit is latest commit that's valid to log
+            if new_parent in commits:
+                ci = new_parent
+            else:
+                ci = max(commits, key=lambda ci:ci_times[ci])
+            commits.remove(ci)
+            if skip:
+                skip -= 1
+                continue
+            else:
+                limit -= 1
+            yield ci
+            # remove this commit from its parents children and add any childless
+            # parents to the 'ready set'
+            new_parent = None
+            for oid in ci_parents[ci]:
+                children = ci_children[oid]
+                children.discard(ci)
+                if not children:
+                    commits.add(oid)
+                    new_parent = oid
+
+    # Load all the runs to build a commit graph
+    ci_times = {}
+    ci_parents = {}
+    ci_children = defaultdict(set)
+    log.info('Build commit graph')
+    _visit(commit_id)
+    for oid, parents in ci_parents.iteritems():
+        for ci_parent in parents:
+            ci_children[ci_parent].add(oid)
+
+    return _gen_ids(commit_id, skip, limit)