--- a/scripts/refresh-last-commits.py
+++ b/Allura/allura/scripts/refresh_last_commits.py
@@ -11,127 +11,162 @@
from ming.orm import ThreadLocalORMSession, session
from allura import model as M
-from allura.lib.utils import chunked_find, chunked_list
+from allura.lib.utils import chunked_find
from allura.tasks.repo_tasks import refresh
+from allura.scripts import ScriptTask
log = logging.getLogger(__name__)
-def main(options):
- q_project = {}
- if options.nbhd:
- nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
- if not nbhd:
- return "Invalid neighborhood url prefix."
- q_project['neighborhood_id'] = nbhd._id
- if options.project:
- q_project['shortname'] = options.project
- elif options.project_regex:
- q_project['shortname'] = {'$regex': options.project_regex}
+class RefreshLastCommits(ScriptTask):
+ @classmethod
+ def parser(cls):
+ def _repo_type_list(s):
+ repo_types = []
+ for repo_type in s.split(','):
+ repo_type = repo_type.strip()
+ if repo_type not in ['git', 'hg']:
+ raise argparse.ArgumentTypeError(
+ '{0} is not a valid repo type.'.format(repo_type))
+ repo_types.append(repo_type)
+ return repo_types
+ parser = argparse.ArgumentParser(description='Using existing commit data, '
+ 'refresh the last commit metadata in MongoDB. Run for all repos (no args), '
+ 'or restrict by neighborhood, project, or code tool mount point.')
+ parser.add_argument('--nbhd', action='store', default='', dest='nbhd',
+ help='Restrict update to a particular neighborhood, e.g. /p/.')
+ parser.add_argument('--project', action='store', default='', dest='project',
+ help='Restrict update to a particular project. To specify a '
+ 'subproject, use a slash: project/subproject.')
+ parser.add_argument('--project-regex', action='store', default='',
+ dest='project_regex',
+ help='Restrict update to projects for which the shortname matches '
+ 'the provided regex.')
+ parser.add_argument('--repo-types', action='store', type=_repo_type_list,
+ default=['git', 'hg'], dest='repo_types',
+ help='Only refresh last commits for repos of the given type(s). Defaults to: '
+ 'git,hg. Example: --repo-types=git')
+ parser.add_argument('--mount-point', default='', dest='mount_point',
+ help='Restrict update to repos at the given tool mount point. ')
+ parser.add_argument('--clean', action='store_true', dest='clean',
+ default=False, help='Remove last commit mongo docs for '
+ 'project(s) being refreshed before doing the refresh.')
+ parser.add_argument('--dry-run', action='store_true', dest='dry_run',
+ default=False, help='Log names of projects that would have their ')
+ parser.add_argument('--diffs', action='store_true', dest='diffs',
+ default=False, help='Refresh / clean diffs as well as LCDs')
+ return parser
- log.info('Refreshing last commit data')
+ @classmethod
+ def execute(cls, options):
+ q_project = {}
+ if options.nbhd:
+ nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
+ if not nbhd:
+ return "Invalid neighborhood url prefix."
+ q_project['neighborhood_id'] = nbhd._id
+ if options.project:
+ q_project['shortname'] = options.project
+ elif options.project_regex:
+ q_project['shortname'] = {'$regex': options.project_regex}
- for chunk in chunked_find(M.Project, q_project):
- for p in chunk:
- log.info("Refreshing last commit data for project '%s'." % p.shortname)
- if options.dry_run:
+ log.info('Refreshing last commit data')
+
+ for chunk in chunked_find(M.Project, q_project):
+ for p in chunk:
+ log.info("Refreshing last commit data for project '%s'." % p.shortname)
+ if options.dry_run:
+ continue
+ c.project = p
+ if options.mount_point:
+ mount_points = [options.mount_point]
+ else:
+ mount_points = [ac.options.mount_point for ac in
+ M.AppConfig.query.find(dict(project_id=p._id))]
+ for app in (p.app_instance(mp) for mp in mount_points):
+ c.app = app
+ if not hasattr(app, 'repo'):
+ continue
+ if c.app.repo.tool.lower() not in options.repo_types:
+ log.info("Skipping %r: wrong type (%s)", c.app.repo,
+ c.app.repo.tool.lower())
+ continue
+
+ c.app.repo.status = 'analyzing'
+ session(c.app.repo).flush(c.app.repo)
+ try:
+ ci_ids = list(reversed(list(c.app.repo.all_commit_ids())))
+ if options.clean:
+ cls._clean(ci_ids, options.diffs)
+
+ log.info('Refreshing all last commits in %r', c.app.repo)
+ cls.refresh_repo_lcds(ci_ids, options)
+ new_commit_ids = app.repo.unknown_commit_ids()
+ if len(new_commit_ids) > 0:
+ refresh.post()
+ except:
+ log.exception('Error refreshing %r', c.app.repo)
+ raise
+ finally:
+ c.app.repo.status = 'ready'
+ session(c.app.repo).flush(c.app.repo)
+ ThreadLocalORMSession.flush_all()
+ ThreadLocalORMSession.close_all()
+
+ @classmethod
+ def refresh_repo_lcds(cls, commit_ids, options):
+ tree_cache = {}
+ timings = []
+ if options.diffs:
+ print 'Processing diffs'
+ for i, commit_id in enumerate(commit_ids):
+ commit = M.repo.Commit.query.get(_id=commit_id)
+ with time(timings):
+ M.repo_refresh.compute_diffs(c.app.repo._id, tree_cache, commit)
+ if i % 1000 == 0:
+ cls._print_stats(i, timings, 1000)
+
+ lcd_cache = M.repo.ModelCache(
+ max_instances={M.repo.LastCommit: 4000},
+ max_queries={M.repo.LastCommit: 4000},
+ )
+ timings = []
+ print 'Processing last commits'
+ for i, commit_id in enumerate(commit_ids):
+ commit = M.repo.Commit.query.get(_id=commit_id)
+ if commit is None:
+ print "Commit missing, skipping: %s" % commit_id
continue
- c.project = p
- if options.mount_point:
- mount_points = [options.mount_point]
- else:
- mount_points = [ac.options.mount_point for ac in
- M.AppConfig.query.find(dict(project_id=p._id))]
- for app in (p.app_instance(mp) for mp in mount_points):
- c.app = app
- if not hasattr(app, 'repo'):
- continue
- if c.app.repo.tool.lower() not in options.repo_types:
- log.info("Skipping %r: wrong type (%s)", c.app.repo,
- c.app.repo.tool.lower())
- continue
+ commit.set_context(c.app.repo)
+ with time(timings):
+ M.repo_refresh.compute_lcds(commit, lcd_cache)
+ ThreadLocalORMSession.flush_all()
+ if i % 100 == 0:
+ cls._print_stats(i, timings, 100)
+ ThreadLocalORMSession.flush_all()
- c.app.repo.status = 'analyzing'
- session(c.app.repo).flush(c.app.repo)
- try:
- ci_ids = list(reversed(list(c.app.repo.all_commit_ids())))
- if options.clean:
- if options.diffs:
- # delete DiffInfoDocs
- i = M.repo.DiffInfoDoc.m.find(dict(commit_ids={'$in': ci_ids})).count()
- log.info("Deleting %i DiffInfoDoc docs, by repo id...", i)
- M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids}))
+ @classmethod
+ def _clean(cls, commit_ids, clean_diffs):
+ if clean_diffs:
+ # delete DiffInfoDocs
+ i = M.repo.DiffInfoDoc.m.find(dict(commit_ids={'$in': commit_ids})).count()
+ log.info("Deleting %i DiffInfoDoc docs for %i commits...", i, len(commit_ids))
+ M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': commit_ids}))
- # delete LastCommitDocs
- i = M.repo.LastCommitDoc.m.find(dict(commit_ids={'$in': ci_ids})).count()
- log.info("Deleting %i LastCommitDoc docs, by repo id...", i)
- M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids}))
+ # delete LastCommitDocs
+ i = M.repo.LastCommitDoc.m.find(dict(commit_ids={'$in': commit_ids})).count()
+ log.info("Deleting %i LastCommitDoc docs for %i commits...", i, len(commit_ids))
+ M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': commit_ids}))
- log.info('Refreshing all last commits in %r', c.app.repo)
- if options.profile:
- import cProfile
- cProfile.runctx('refresh_repo_lcds(ci_ids, options)',
- globals(), locals(), '/tmp/refresh_lcds.profile')
- else:
- refresh_repo_lcds(ci_ids, options)
- new_commit_ids = app.repo.unknown_commit_ids()
- if len(new_commit_ids) > 0:
- refresh.post()
- except:
- log.exception('Error refreshing %r', c.app.repo)
- raise
- finally:
- c.app.repo.status = 'ready'
- session(c.app.repo).flush(c.app.repo)
- ThreadLocalORMSession.flush_all()
- ThreadLocalORMSession.close_all()
+ @classmethod
+ def _print_stats(cls, processed, timings, debug_step):
+ mt = max(timings)
+ tt = sum(timings)
+ at = tt / len(timings)
+ mat = sum(timings[-debug_step:]) / debug_step
+ print ' Processed %d commits (max: %f, avg: %f, mavg: %f, tot: %f)' % (
+ processed, mt, at, mat, tt)
-
-def enum_step(iter, step):
- for i,elem in enumerate(iter):
- if i % step == 0:
- yield i, elem
-
-def refresh_repo_lcds(commit_ids, options):
- tree_cache = {}
- timings = []
- if options.diffs:
- print 'Processing diffs'
- for commit_id in commit_ids:
- commit = M.repo.Commit.query.get(_id=commit_id)
- with time(timings):
- M.repo_refresh.compute_diffs(c.app.repo._id, tree_cache, commit)
- if len(timings) % 1000 == 0:
- _print_stats(timings, 1000)
- lcd_cache = M.repo.ModelCache(
- max_instances={M.repo.LastCommit: 4000},
- max_queries={M.repo.LastCommit: 10000},
- )
- timings = []
- print 'Processing last commits'
- debug_step = int(pow(10, max(0, int(log10(len(commit_ids)) - log10(options.step) - 1))))
- _cids = commit_ids[options.skip:]
- for i, commit_id in enum_step(_cids, options.step):
- commit = M.repo.Commit.query.get(_id=commit_id)
- commit.set_context(c.app.repo)
- with time(timings):
- M.repo_refresh.compute_lcds(commit, lcd_cache)
- ThreadLocalORMSession.flush_all()
- # ensure new LCDs get fully refreshed in the cache
- # so that every commit sees the same copy
- lcd_cache.expire_new_instances(M.repo.LastCommit)
- if len(timings) % debug_step == 0:
- _print_stats(timings, debug_step)
- ThreadLocalORMSession.flush_all()
-
-
-def _print_stats(timings, debug_step):
- mt = max(timings)
- tt = sum(timings)
- at = tt / len(timings)
- mat = sum(timings[-debug_step:]) / debug_step
- print ' Processed %d commits (max: %f, avg: %f, mavg: %f, tot: %f)' % (
- len(timings), mt, at, mat, tt)
@contextmanager
def time(timings):
@@ -140,56 +175,7 @@
timings.append((datetime.now() - s).total_seconds())
-def repo_type_list(s):
- repo_types = []
- for repo_type in s.split(','):
- repo_type = repo_type.strip()
- if repo_type not in ['git', 'hg']:
- raise argparse.ArgumentTypeError(
- '{0} is not a valid repo type.'.format(repo_type))
- repo_types.append(repo_type)
- return repo_types
-
-
-def parse_options():
- parser = argparse.ArgumentParser(description='Using existing commit data, '
- 'refresh the last commit metadata in MongoDB. Run for all repos (no args), '
- 'or restrict by neighborhood, project, or code tool mount point.')
- parser.add_argument('--nbhd', action='store', default='', dest='nbhd',
- help='Restrict update to a particular neighborhood, e.g. /p/.')
- parser.add_argument('--project', action='store', default='', dest='project',
- help='Restrict update to a particular project. To specify a '
- 'subproject, use a slash: project/subproject.')
- parser.add_argument('--project-regex', action='store', default='',
- dest='project_regex',
- help='Restrict update to projects for which the shortname matches '
- 'the provided regex.')
- parser.add_argument('--repo-types', action='store', type=repo_type_list,
- default=['git', 'hg'], dest='repo_types',
- help='Only refresh last commits for repos of the given type(s). Defaults to: '
- 'git,hg. Example: --repo-types=git')
- parser.add_argument('--mount_point', default='', dest='mount_point',
- help='Restrict update to repos at the given tool mount point. ')
- parser.add_argument('--clean', action='store_true', dest='clean',
- default=False, help='Remove last commit mongo docs for '
- 'project(s) being refreshed before doing the refresh.')
- parser.add_argument('--dry-run', action='store_true', dest='dry_run',
- default=False, help='Log names of projects that would have their '
- 'last commits refreshed, but do not perform the actual refresh.')
- parser.add_argument('--profile', action='store_true', dest='profile',
- default=False, help='Enable the profiler (slow). Will log '
- 'profiling output to ./refresh.profile')
- parser.add_argument('--diffs', action='store_true', dest='diffs',
- default=False, help='Refresh diffs as well as LCDs')
- parser.add_argument('--all', action='store_const', dest='step',
- const=1, default=100, help='Refresh the LCD for every commit instead of every 100th')
- parser.add_argument('--step', action='store', dest='step',
- type=int, default=100, help='Refresh the LCD for every Nth commit instead of every 100th')
- parser.add_argument('--skip', action='store', dest='skip',
- type=int, default=0, help='Skip a number of commits')
- return parser.parse_args()
if __name__ == '__main__':
- import sys
faulthandler.enable()
- sys.exit(main(parse_options()))
+ RefreshLastCommits.main()