import os
import shutil
import string
import logging
import subprocess
from subprocess import Popen, PIPE
from hashlib import sha1
from cStringIO import StringIO
from datetime import datetime
import tg
import pysvn
import pylons
pylons.c = pylons.tmpl_context
pylons.g = pylons.app_globals
from pymongo.errors import DuplicateKeyError
from pylons import c
from ming.base import Object
from ming.orm import Mapper, FieldProperty, session
from ming.utils import LazyProperty
import allura.tasks
from allura import model as M
from allura.lib import helpers as h
from allura.model.repository import GitLikeTree
from allura.model.auth import User
log = logging.getLogger(__name__)
class Repository(M.Repository):
tool_name='SVN'
repo_id='svn'
type_s='SVN Repository'
class __mongometa__:
name='svn-repository'
branches = FieldProperty([dict(name=str,object_id=str)])
@LazyProperty
def _impl(self):
return SVNImplementation(self)
def _log(self, rev, skip, max_count):
ci = self.commit(rev)
if ci is None: return []
return ci.log(int(skip), int(max_count))
def clone_command(self, category, username=''):
'''Return a string suitable for copy/paste that would clone this repo locally
category is one of 'ro' (read-only), 'rw' (read/write), or 'https' (read/write via https)
'''
if not username and c.user not in (None, User.anonymous()):
username = c.user.username
tpl = string.Template(tg.config.get('scm.clone.%s.%s' % (category, self.tool)) or
tg.config.get('scm.clone.%s' % self.tool))
return tpl.substitute(dict(username=username,
source_url=self.clone_url(category, username)+c.app.config.options.get('checkout_url'),
dest_path=self.suggested_clone_dest_path()))
def compute_diffs(self): return
def count(self, *args, **kwargs):
return super(Repository, self).count(None)
def log(self, branch=None, offset=0, limit=10):
return list(self._log(rev=branch, skip=offset, max_count=limit))
def latest(self, branch=None):
if self._impl is None: return None
if not self.heads: return None
last_id = self.heads[0].object_id
# check the latest revision on the real repo because sometimes the refresh gets stuck
info = self._impl._svn.info2(
self._impl._url,
revision=pysvn.Revision(pysvn.opt_revision_kind.head),
recurse=False)[0][1]
if info.rev.number > int(last_id.split(':')[1]):
last_id = self._impl._oid(info.rev.number)
# the repo is in a bad state, run a refresh
allura.tasks.repo_tasks.refresh.post()
return self._impl.commit(last_id)
class SVNCalledProcessError(Exception):
def __init__(self, cmd, returncode, stdout, stderr):
self.cmd = cmd
self.returncode = returncode
self.stdout = stdout
self.stderr = stderr
def __str__(self):
return "Command: '%s' returned non-zero exit status %s\nSTDOUT: %s\nSTDERR: %s" % \
(self.cmd, self.returncode, self.stdout, self.stderr)
class SVNImplementation(M.RepositoryImplementation):
post_receive_template = string.Template(
'#!/bin/bash\n'
'# The following line is required for site integration, do not remove/modify\n'
'curl -s $url\n')
def __init__(self, repo):
self._repo = repo
@LazyProperty
def _svn(self):
return pysvn.Client()
@LazyProperty
def _url(self):
return 'file://%s%s' % (self._repo.fs_path, self._repo.name)
def shorthand_for_commit(self, oid):
return '[r%d]' % self._revno(oid)
def url_for_commit(self, commit):
if isinstance(commit, basestring):
object_id = commit
else:
object_id = commit._id
return '%s%d/' % (
self._repo.url(), self._revno(object_id))
def init(self, default_dirs=True, skip_special_files=False):
fullname = self._setup_paths()
log.info('svn init %s', fullname)
if os.path.exists(fullname):
shutil.rmtree(fullname)
subprocess.call(['svnadmin', 'create', self._repo.name],
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
cwd=self._repo.fs_path)
if not skip_special_files:
self._setup_special_files()
self._repo.status = 'ready'
# make first commit with dir structure
if default_dirs:
self._repo._impl._svn.checkout('file://'+fullname, fullname+'/tmp')
os.mkdir(fullname+'/tmp/trunk')
os.mkdir(fullname+'/tmp/tags')
os.mkdir(fullname+'/tmp/branches')
self._repo._impl._svn.add(fullname+'/tmp/trunk')
self._repo._impl._svn.add(fullname+'/tmp/tags')
self._repo._impl._svn.add(fullname+'/tmp/branches')
self._repo._impl._svn.checkin([fullname+'/tmp/trunk',fullname+'/tmp/tags',fullname+'/tmp/branches'],'Initial commit')
shutil.rmtree(fullname+'/tmp')
def clone_from(self, source_url):
'''Initialize a repo as a clone of another using svnsync'''
self.init(default_dirs=False, skip_special_files=True)
self._repo.status = 'importing'
session(self._repo).flush()
log.info('Initialize %r as a clone of %s',
self._repo, source_url)
# Need a pre-revprop-change hook for cloning
fn = os.path.join(self._repo.fs_path, self._repo.name,
'hooks', 'pre-revprop-change')
with open(fn, 'wb') as fp:
fp.write('#!/bin/sh\n')
os.chmod(fn, 0755)
def check_call(cmd):
p = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
stdout, stderr = p.communicate(input='p\n')
if p.returncode != 0:
raise SVNCalledProcessError(cmd, p.returncode, stdout, stderr)
check_call(['svnsync', 'init', self._url, source_url])
check_call(['svnsync', '--non-interactive', 'sync', self._url])
self._repo.status = 'analyzing'
session(self._repo).flush()
log.info('... %r cloned, analyzing', self._repo)
self._repo.refresh(notify=False)
self._repo.status = 'ready'
log.info('... %s ready', self._repo)
session(self._repo).flush()
self._setup_special_files()
def refresh_heads(self):
info = self._svn.info2(
self._url,
revision=pysvn.Revision(pysvn.opt_revision_kind.head),
recurse=False)[0][1]
oid = self._oid(info.rev.number)
self._repo.heads = [ Object(name=None, object_id=oid) ]
# Branches and tags aren't really supported in subversion
self._repo.branches = []
self._repo.repo_tags = []
session(self._repo).flush()
def commit(self, rev):
if rev in ('HEAD', None):
if not self._repo.heads: return None
oid = self._repo.heads[0].object_id
elif isinstance(rev, int) or rev.isdigit():
oid = self._oid(rev)
else:
oid = rev
result = M.repo.Commit.query.get(_id=oid)
if result is None: return None
result.set_context(self._repo)
return result
def all_commit_ids(self):
head_revno = self._revno(self._repo.heads[0].object_id)
return map(self._oid, range(1, head_revno+1))
def new_commits(self, all_commits=False):
head_revno = self._revno(self._repo.heads[0].object_id)
oids = [ self._oid(revno) for revno in range(1, head_revno+1) ]
if all_commits:
return oids
# Find max commit id -- everything greater than that will be "unknown"
prefix = self._oid('')
q = M.repo.Commit.query.find(
dict(
type='commit',
_id={'$gt':prefix},
),
dict(_id=True)
)
seen_oids = set()
for d in q.ming_cursor.cursor:
oid = d['_id']
if not oid.startswith(prefix): break
seen_oids.add(oid)
return [
oid for oid in oids if oid not in seen_oids ]
def refresh_commit_info(self, oid, seen_object_ids, lazy=True):
from allura.model.repo import CommitDoc, DiffInfoDoc
ci_doc = CommitDoc.m.get(_id=oid)
if ci_doc and lazy: return False
revno = self._revno(oid)
rev = self._revision(oid)
try:
log_entry = self._svn.log(
self._url,
revision_start=rev,
limit=1,
discover_changed_paths=True)[0]
except pysvn.ClientError:
log.info('ClientError processing %r %r, treating as empty', oid, self._repo, exc_info=True)
log_entry = Object(date='', message='', changed_paths=[])
log_date = None
if hasattr(log_entry, 'date'):
log_date = datetime.utcfromtimestamp(log_entry.date)
user = Object(
name=log_entry.get('author', '--none--'),
email='',
date=log_date)
args = dict(
tree_id=None,
committed=user,
authored=user,
message=log_entry.get("message", "--none--"),
parent_ids=[],
child_ids=[])
if revno > 1:
args['parent_ids'] = [ self._oid(revno-1) ]
if ci_doc:
ci_doc.update(**args)
ci_doc.m.save()
else:
ci_doc = CommitDoc(dict(args, _id=oid))
try:
ci_doc.m.insert(safe=True)
except DuplicateKeyError:
if lazy: return False
# Save diff info
di = DiffInfoDoc.make(dict(_id=ci_doc._id, differences=[]))
for path in log_entry.changed_paths:
if path.action in ('A', 'M', 'R'):
rhs_info = self._svn.info2(
self._url + h.really_unicode(path.path),
revision=self._revision(ci_doc._id),
recurse=False)[0][1]
rhs_id = self._obj_oid(ci_doc._id, rhs_info)
else:
rhs_id = None
if path.action in ('D', 'M', 'R'):
try:
lhs_info = self._svn.info2(
self._url + h.really_unicode(path.path),
revision=self._revision(ci_doc.parent_ids[0]),
recurse=False)[0][1]
lhs_id = self._obj_oid(ci_doc._id, lhs_info)
except pysvn.ClientError, e:
# pysvn will sometimes report new files as 'M'odified,
# causing info2() to raise ClientError since the file
# doesn't exist in the parent revision. Set lhs_id = None
# to treat like a newly added file.
log.debug(e)
lhs_id = None
else:
lhs_id = None
di.differences.append(dict(
name=h.really_unicode(path.path),
lhs_id=lhs_id,
rhs_id=rhs_id))
di.m.save()
return True
def compute_tree_new(self, commit, tree_path='/'):
from allura.model import repo as RM
tree_path = tree_path[:-1]
tree_id = self._tree_oid(commit._id, tree_path)
tree, isnew = RM.Tree.upsert(tree_id)
if not isnew: return tree_id
log.debug('Computing tree for %s: %s',
self._revno(commit._id), tree_path)
rev = self._revision(commit._id)
try:
infos = self._svn.info2(
self._url + tree_path,
revision=rev,
depth=pysvn.depth.immediates)
except pysvn.ClientError:
log.exception('Error computing tree for %s: %s(%s)',
self._repo, commit, tree_path)
tree.delete()
return None
log.debug('Compute tree for %d paths', len(infos))
for path, info in infos[1:]:
last_commit_id = self._oid(info['last_changed_rev'].number)
last_commit = M.repo.Commit.query.get(_id=last_commit_id)
M.repo_refresh.set_last_commit(
self._repo._id,
self._tree_oid(commit._id, path),
M.repo_refresh.get_commit_info(last_commit))
if info.kind == pysvn.node_kind.dir:
tree.tree_ids.append(Object(
id=self._tree_oid(commit._id, path),
name=path))
elif info.kind == pysvn.node_kind.file:
tree.blob_ids.append(Object(
id=self._tree_oid(commit._id, path),
name=path))
else:
assert False
session(tree).flush(tree)
return tree_id
def _tree_oid(self, commit_id, path):
data = 'tree\n%s\n%s' % (commit_id, h.really_unicode(path))
return sha1(data.encode('utf-8')).hexdigest()
def _blob_oid(self, commit_id, path):
data = 'blob\n%s\n%s' % (commit_id, h.really_unicode(path))
return sha1(data.encode('utf-8')).hexdigest()
def _obj_oid(self, commit_id, info):
path = info.URL[len(info.repos_root_URL):]
if info.kind == pysvn.node_kind.dir:
return self._tree_oid(commit_id, path)
else:
return self._blob_oid(commit_id, path)
def log(self, object_id, skip, count):
revno = self._revno(object_id)
result = []
while count and revno:
if skip == 0:
result.append(self._oid(revno))
count -= 1
else:
skip -= 1
revno -= 1
if revno:
return result, [ self._oid(revno) ]
else:
return result, []
def open_blob(self, blob):
data = self._svn.cat(
self._url + blob.path(),
revision=self._revision(blob.commit._id))
return StringIO(data)
def blob_size(self, blob):
try:
data = self._svn.list(
self._url + blob.path(),
revision=self._revision(blob.commit.object_id),
dirent_fields=pysvn.SVN_DIRENT_SIZE)
except pysvn.ClientError:
log.info('ClientError getting filesize %r %r, returning 0', blob.path(), self._repo, exc_info=True)
return 0
try:
size = data[0][0]['size']
except (IndexError, KeyError):
log.info('Error getting filesize: bad data from svn client %r %r, returning 0', blob.path(), self._repo, exc_info=True)
size = 0
return size
def _setup_hooks(self):
'Set up the post-commit and pre-revprop-change hooks'
text = self.post_receive_template.substitute(
url=tg.config.get('base_url', 'http://localhost:8080')
+ '/auth/refresh_repo' + self._repo.url())
fn = os.path.join(self._repo.fs_path, self._repo.name, 'hooks', 'post-commit')
with open(fn, 'wb') as fp:
fp.write(text)
os.chmod(fn, 0755)
fn = os.path.join(self._repo.fs_path, self._repo.name, 'hooks', 'pre-revprop-change')
with open(fn, 'wb') as fp:
fp.write('#!/bin/sh\n')
os.chmod(fn, 0755)
def _revno(self, oid):
return int(oid.split(':')[1])
def _revision(self, oid):
return pysvn.Revision(
pysvn.opt_revision_kind.number,
self._revno(oid))
def _oid(self, revno):
return '%s:%s' % (self._repo._id, revno)
Mapper.compile_all()