--- a/Allura/allura/model/repo.py
+++ b/Allura/allura/model/repo.py
@@ -1,77 +1,59 @@
+import re
+import sys
+import logging
+from hashlib import sha1
+from itertools import izip, chain
from datetime import datetime
-
-from ming import Document, Field
+from collections import defaultdict
+
+from pylons import g
+
+from ming import Field, Index, collection
from ming import schema as S
-
+from ming.utils import LazyProperty
+from ming.orm import mapper
+
+from allura.lib import utils
+from allura.lib import helpers as h
+
+from .auth import User
from .session import main_doc_session, project_doc_session
-
-class Commit(Document):
- class __mongometa__:
- name = 'repo_ci'
- session = main_doc_session
- indexes = [
- ('parent_ids',),
- ('child_ids',),
- ('repo_ids',)]
- User = dict(name=str, email=str, date=datetime)
-
- _id = Field(str)
- tree_id = Field(str)
- committed = Field(User)
- authored = Field(User)
- message = Field(str)
- parent_ids = Field([str])
- child_ids = Field([str])
- repo_ids = Field([S.ObjectId()])
-
- def __repr__(self):
- return '%s %s' % (
- self._id[:7], self.summary)
-
- @property
- def summary(self):
- if self.message:
- summary = []
- for line in self.message.splitlines():
- line = line.rstrip()
- if line: summary.append(line)
- else: return ' '.join(summary)
- return ' '.join(summary)
- return ''
-
- def url(self):
- return ''
-
- def shorthand_id(self):
- return ''
-
- @property
- def author_url(self):
- return ''
-
-class Tree(Document):
- class __mongometa__:
- name = 'repo_tree'
- session = main_doc_session
- ObjType=S.OneOf('blob', 'tree', 'submodule')
-
- _id = Field(str)
- tree_ids = Field([dict(name=str, id=str)])
- blob_ids = Field([dict(name=str, id=str)])
- other_ids = Field([dict(name=str, id=str, type=ObjType)])
-
-class LastCommit(Document):
- class __mongometa__:
- name = 'repo_last_commit'
- session = project_doc_session
- indexes = [
- ( 'repo_id', 'object_id'),
- ]
-
- _id = Field(str)
- repo_id=Field(S.ObjectId())
- object_id=Field(str)
- commit_info = Field(dict(
+from .session import repository_orm_session
+
+log = logging.getLogger(__name__)
+
+SUser = dict(name=str, email=str, date=datetime)
+SObjType=S.OneOf('blob', 'tree', 'submodule')
+QSIZE = 100
+README_RE = re.compile('^README(\.[^.]*)?$', re.IGNORECASE)
+
+# Basic commit information
+CommitDoc = collection(
+ 'repo_ci', main_doc_session,
+ Field('_id', str),
+ Field('tree_id', str),
+ Field('committed', SUser),
+ Field('authored', SUser),
+ Field('message', str),
+ Field('parent_ids', [str], index=True),
+ Field('child_ids', [str], index=True),
+ Field('repo_ids', [ S.ObjectId() ], index=True))
+
+# Basic tree information
+TreeDoc = collection(
+ 'repo_tree', main_doc_session,
+ Field('_id', str),
+ Field('tree_ids', [dict(name=str, id=str)]),
+ Field('blob_ids', [dict(name=str, id=str)]),
+ Field('other_ids', [dict(name=str, id=str, type=SObjType)]))
+
+# Information about the last commit to touch a tree/blob
+LastCommitDoc = collection(
+ 'repo_last_commit', project_doc_session,
+ Field('_id', str),
+ Field('repo_id', S.ObjectId()),
+ Field('object_id', str),
+ Field('commit_info', dict(
id=str,
date=datetime,
author=str,
@@ -79,58 +61,298 @@
author_url=str,
href=str,
shortlink=str,
- summary=str))
-
- @classmethod
- def set_last_commit(cls, repo_id, oid, commit):
- lc = cls(dict(
- _id='%s:%s' % (repo_id, oid),
- repo_id=repo_id,
- object_id=oid,
- commit_info=dict(
- id=commit._id,
- author=commit.authored.name,
- author_email=commit.authored.email,
- author_url=commit.author_url,
- date=commit.authored.date,
- href=commit.url(),
- shortlink=commit.shorthand_id(),
- summary=commit.summary)))
- lc.m.save(safe=False)
- return lc
-
-class Trees(Document):
- class __mongometa__:
- name = 'repo_trees'
- session = main_doc_session
-
- _id = Field(str) # commit ID
- tree_ids = Field([str]) # tree IDs
-
-class DiffInfo(Document):
- class __mongometa__:
- name = 'repo_diffinfo'
- session = main_doc_session
-
- _id = Field(str)
- differences = Field([dict(name=str, lhs_id=str, rhs_id=str)])
-
-class BasicBlock(Document):
- class __mongometa__:
- name = 'repo_basic_block'
- session = main_doc_session
- indexes = [
- ('commit_ids',) ]
-
- _id = Field(str)
- parent_commit_ids = Field([str])
- commit_ids = Field([str])
- commit_times = Field([datetime])
+ summary=str)),
+ Index('repo_id', 'object_id'))
+
+# List of all trees contained within a commit
+TreesDoc = collection(
+ 'repo_trees', main_doc_session,
+ Field('_id', str),
+ Field('tree_ids', [str]))
+
+# Information about which things were added/removed in commit
+DiffInfoDoc = collection(
+ 'repo_diffinfo', main_doc_session,
+ Field('_id', str),
+ Field(
+ 'differences',
+ [ dict(name=str, lhs_id=str, rhs_id=str)]))
+
+# List of commit runs (a run is a linear series of single-parent commits)
+CommitRunDoc = collection(
+ 'repo_commitrun', main_doc_session,
+ Field('_id', str),
+ Field('parent_commit_ids', [str]),
+ Field('commit_ids', [str], index=True),
+ Field('commit_times', [datetime]))
+
+class RepoObject(object):
def __repr__(self):
- return '%s: (P %s, T %s..%s (%d commits))' % (
- self._id[:6],
- [ oid[:6] for oid in self.parent_commit_ids ],
- self.commit_ids[0][:6],
- self.commit_ids[-1][:6],
- len(self.commit_ids))
+ return '<%s %s>' % (
+ self.__class__.__name__, self._id)
+
+ def primary(self):
+ return self
+
+ def index_id(self):
+ '''Globally unique artifact identifier. Used for
+ SOLR ID, shortlinks, and maybe elsewhere
+ '''
+ id = '%s.%s#%s' % (
+ self.__class__.__module__,
+ self.__class__.__name__,
+ self._id)
+ return id.replace('.', '/')
+
+class Commit(RepoObject):
+ # Ephemeral attrs
+ repo=None
+
+ @LazyProperty
+ def author_url(self):
+ u = User.by_email_address(self.authored.email)
+ if u: return u.url()
+
+ @LazyProperty
+ def committer_url(self):
+ u = User.by_email_address(self.committed.email)
+ if u: return u.url()
+
+ @LazyProperty
+ def tree(self):
+ if self.tree_id is None:
+ self.tree_id = self.repo.compute_tree(self)
+ if self.tree_id is None:
+ return None
+ t = Tree.query.get(object_id=self.tree_id)
+ if t is None:
+ self.tree_id = self.repo.compute_tree(self)
+ t = Tree.query.get(object_id=self.tree_id)
+ if t is not None: t.set_context(self)
+ return t
+
+ @LazyProperty
+ def summary(self):
+ message = h.really_unicode(self.message)
+ first_line = message.split('\n')[0]
+ return h.text.truncate(first_line, 50)
+
+ def get_path(self, path):
+ '''Return the blob on the given path'''
+ if path.startswith('/'): path = path[1:]
+ path_parts = path.split('/')
+ return self.tree.get_blob(path_parts[-1], path_parts[:-1])
+
+ def shorthand_id(self):
+ return self.repo.shorthand_for_commit(self)
+
+ @LazyProperty
+ def symbolic_ids(self):
+ return self.repo.symbolics_for_commit(self)
+
+ def url(self):
+ return self.repo.url_for_commit(self)
+
+ def log_iter(self, skip, count):
+ for oids in utils.chunked_iter(commitlog(self._id), QSIZE):
+ oids = list(oids)
+ commits = dict(
+ (ci._id, ci) for ci in self.query.find(dict(
+ _id={'$in': oids})))
+ for oid in oids:
+ ci = commits[oid]
+ ci.set_context(self.repo)
+ yield ci
+
+ def log(self, skip, count):
+ return list(self.log_iter(skip, count))
+
+ def count_revisions(self):
+ result = 0
+ for oid in commitlog(self): result += 1
+ return result
+
+ def context(self):
+ result = dict(prev=None, next=None)
+ if self.parent_ids:
+ result['prev'] = self.query.get(_id=self.parent_ids[0])
+ if self.child_ids:
+ result['next'] = self.query.get(_id=self.child_ids[0])
+ return result
+
+class Tree(RepoObject):
+ # Ephemeral attrs
+ repo=None
+ commit=None
+ parent=None
+ name=None
+
+ def compute_hash(self):
+ '''Compute a hash based on the contents of the tree. Note that this
+ hash does not necessarily correspond to any actual DVCS hash.
+ '''
+ lines = (
+ [ 'tree' + x.name + x.id for x in self.tree_ids ]
+ + [ 'blob' + x.name + x.id for x in self.blob_ids ]
+ + [ x.type + x.name + x.id for x in self.other_ids ])
+ sha_obj = sha1()
+ for line in sorted(lines):
+ sha_obj.update(line)
+ return sha_obj.hexdigest()
+
+ def set_context(self, commit_or_tree, name=None):
+ assert commit_or_tree is not self
+ self.repo = commit_or_tree.repo
+ if name:
+ self.commit = commit_or_tree.commit
+ self.parent = commit_or_tree
+ self.name = name
+ else:
+ self.commit = commit_or_tree
+
+ def readme(self):
+ name = None
+ text = ''
+ for x in self.blob_ids:
+ if README_RE.match(x.name):
+ name = x.name
+ text = h.really_unicode(self.repo.blob_text(x.id))
+ break
+ if text == '':
+ text = '<p><em>Empty File</em></p>'
+ else:
+ renderer = g.pypeline_markup.renderer(name)
+ if renderer[1]:
+ text = g.pypeline_markup.render(name,text)
+ else:
+ text = '<pre>%s</pre>' % text
+ return (name, text)
+
+ def ls(self):
+ # Load last commit info
+ oids = [ x.id for x in chain(self.tree_ids, self.object_ids, self.other_ids) ]
+ lc_index = dict(
+ (lc.object_id, lc.commit)
+ for lc in LastCommitDoc.m.find(dict(
+ repo_id=self.repo._id,
+ object_id={'$in': oids})))
+ results = []
+ def _get_last_commit(oid):
+ lc = lc_index.get(oid)
+ if lc is None:
+ lc = dict(
+ author=None,
+ author_email=None,
+ author_url=None,
+ date=None,
+ id=None,
+ href=None,
+ shortlink=None,
+ summary=None)
+ return lc
+ for x in sorted(self.tree_ids, key=lambda x:x.name):
+ results.append(dict(
+ kind='DIR',
+ name=x.name,
+ href=x.name + '/',
+ last_commit=_get_last_commit(x.id)))
+ for x in sorted(self.blob_ids, key=lambda x:x.name):
+ results.append(dict(
+ kind='FILE',
+ name=x.name,
+ href=x.name + '/',
+ last_commit=_get_last_commit(x.id)))
+ for x in sorted(self.other_ids, key=lambda x:x.name):
+ results.append(dict(
+ kind=x.type,
+ name=x.name,
+ href=None,
+ last_commit=_get_last_commit(x.id)))
+ return results
+
+ def path(self):
+ if self.parent:
+ assert self.parent is not self
+ return self.parent.path() + self.name + '/'
+ else:
+ return '/'
+
+ def url(self):
+ return self.commit.url() + 'tree' + self.path()
+
+ @LazyProperty
+ def by_name(self):
+ d = dict((x.name, x) for x in self.other_ids)
+ d.update(
+ (x.name, dict(x, type='tree'))
+ for x in self.tree_ids)
+ d.update(
+ (x.name, dict(x, type='blob'))
+ for x in self.blob_ids)
+ return d
+
+ def is_blob(self, name):
+ return self.by_name[name].type == 'blob'
+
+mapper(Commit, CommitDoc, repository_orm_session)
+mapper(Tree, TreeDoc, repository_orm_session)
+
+def commitlog(commit_id, skip=0, limit=sys.maxint):
+
+ seen = set()
+ def _visit(commit_id):
+ if commit_id in seen: return
+ run = CommitRunDoc.m.get(commit_ids=commit_id)
+ if run is None: return
+ index = False
+ for pos, (oid, time) in enumerate(izip(run.commit_ids, run.commit_times)):
+ if oid == commit_id: index = True
+ elif not index: continue
+ seen.add(oid)
+ ci_times[oid] = time
+ if pos+1 < len(run.commit_ids):
+ ci_parents[oid] = [ run.commit_ids[pos+1] ]
+ else:
+ ci_parents[oid] = run.parent_commit_ids
+ for oid in run.parent_commit_ids:
+ _visit(oid)
+
+ def _gen_ids(commit_id, skip, limit):
+ # Traverse the graph in topo order, yielding commit IDs
+ commits = set([commit_id])
+ new_parent = None
+ while commits and limit:
+ # next commit is latest commit that's valid to log
+ if new_parent in commits:
+ ci = new_parent
+ else:
+ ci = max(commits, key=lambda ci:ci_times[ci])
+ commits.remove(ci)
+ if skip:
+ skip -= 1
+ continue
+ else:
+ limit -= 1
+ yield ci
+ # remove this commit from its parents children and add any childless
+ # parents to the 'ready set'
+ new_parent = None
+ for oid in ci_parents[ci]:
+ children = ci_children[oid]
+ children.discard(ci)
+ if not children:
+ commits.add(oid)
+ new_parent = oid
+
+ # Load all the runs to build a commit graph
+ ci_times = {}
+ ci_parents = {}
+ ci_children = defaultdict(set)
+ log.info('Build commit graph')
+ _visit(commit_id)
+ for oid, parents in ci_parents.iteritems():
+ for ci_parent in parents:
+ ci_children[ci_parent].add(oid)
+
+ return _gen_ids(commit_id, skip, limit)