--- a
+++ b/scripts/refresh-last-commits.py
@@ -0,0 +1,186 @@
+import sys
+import argparse
+import logging
+import re
+from math import pow, log10
+from datetime import datetime
+from contextlib import contextmanager
+
+import faulthandler
+from pylons import c
+from ming.orm import ThreadLocalORMSession
+
+from allura import model as M
+from allura.lib.utils import chunked_find, chunked_list
+
+log = logging.getLogger(__name__)
+
+
+def main(options):
+    q_project = {}
+    if options.nbhd:
+        nbhd = M.Neighborhood.query.get(url_prefix=options.nbhd)
+        if not nbhd:
+            return "Invalid neighborhood url prefix."
+        q_project['neighborhood_id'] = nbhd._id
+    if options.project:
+        q_project['shortname'] = options.project
+    elif options.project_regex:
+        q_project['shortname'] = {'$regex': options.project_regex}
+
+    log.info('Refreshing last commit data')
+
+    for chunk in chunked_find(M.Project, q_project):
+        for p in chunk:
+            log.info("Refreshing last commit data for project '%s'." % p.shortname)
+            if options.dry_run:
+                continue
+            c.project = p
+            if options.mount_point:
+                mount_points = [options.mount_point]
+            else:
+                mount_points = [ac.options.mount_point for ac in
+                                M.AppConfig.query.find(dict(project_id=p._id))]
+            for app in (p.app_instance(mp) for mp in mount_points):
+                c.app = app
+                if not hasattr(app, 'repo'):
+                    continue
+                if c.app.repo.tool.lower() not in options.repo_types:
+                    log.info("Skipping %r: wrong type (%s)", c.app.repo,
+                            c.app.repo.tool.lower())
+                    continue
+
+                ci_ids = list(reversed(list(c.app.repo.all_commit_ids())))
+                if options.clean:
+                    if options.diffs:
+                        # delete DiffInfoDocs
+                        i = M.repo.DiffInfoDoc.m.find(dict(commit_ids={'$in': ci_ids})).count()
+                        log.info("Deleting %i DiffInfoDoc docs, by repo id...", i)
+                        M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids}))
+
+                    # delete LastCommitDocs
+                    i = M.repo.LastCommitDoc.m.find(dict(commit_ids={'$in': ci_ids})).count()
+                    log.info("Deleting %i LastCommitDoc docs, by repo id...", i)
+                    M.repo.LastCommitDoc.m.remove(dict(commit_ids={'$in': ci_ids}))
+
+                try:
+                    log.info('Refreshing all last commits in %r', c.app.repo)
+                    if options.profile:
+                        import cProfile
+                        cProfile.runctx('refresh_repo_lcds(ci_ids, options)',
+                                globals(), locals(), '/tmp/refresh_lcds.profile')
+                    else:
+                        refresh_repo_lcds(ci_ids, options)
+                except:
+                    log.exception('Error refreshing %r', c.app.repo)
+                    raise
+        ThreadLocalORMSession.flush_all()
+        ThreadLocalORMSession.close_all()
+
+
+def enum_step(iter, step):
+    for i,elem in enumerate(iter):
+        if i % step == 0:
+            yield i, elem
+
+def refresh_repo_lcds(commit_ids, options):
+    tree_cache = {}
+    timings = []
+    if options.diffs:
+        print 'Processing diffs'
+        for commit_id in commit_ids:
+            commit = M.repo.Commit.query.get(_id=commit_id)
+            with time(timings):
+                M.repo_refresh.compute_diffs(c.app.repo._id, tree_cache, commit)
+            if len(timings) % 1000 == 0:
+                _print_stats(timings, 1000)
+    lcd_cache = M.repo.ModelCache(
+            max_instances={M.repo.LastCommit: 4000},
+            max_queries={M.repo.LastCommit: 10000},
+        )
+    timings = []
+    print 'Processing last commits'
+    debug_step = int(pow(10, max(0, int(log10(len(commit_ids)) - log10(options.step) - 1))))
+    _cids = commit_ids[options.skip:]
+    for i, commit_id in enum_step(_cids, options.step):
+        commit = M.repo.Commit.query.get(_id=commit_id)
+        commit.set_context(c.app.repo)
+        with time(timings):
+            M.repo_refresh.compute_lcds(commit, lcd_cache)
+            ThreadLocalORMSession.flush_all()
+            # ensure new LCDs get fully refreshed in the cache
+            # so that every commit sees the same copy
+            lcd_cache.expire_new_instances(M.repo.LastCommit)
+        if len(timings) % debug_step == 0:
+            _print_stats(timings, debug_step)
+    ThreadLocalORMSession.flush_all()
+
+
+def _print_stats(timings, debug_step):
+    mt = max(timings)
+    tt = sum(timings)
+    at = tt / len(timings)
+    mat = sum(timings[-debug_step:]) / debug_step
+    print '  Processed %d commits (max: %f, avg: %f, mavg: %f, tot: %f)' % (
+            len(timings), mt, at, mat, tt)
+
+@contextmanager
+def time(timings):
+    s = datetime.now()
+    yield
+    timings.append((datetime.now() - s).total_seconds())
+
+
+def repo_type_list(s):
+    repo_types = []
+    for repo_type in s.split(','):
+        repo_type = repo_type.strip()
+        if repo_type not in ['git', 'hg']:
+            raise argparse.ArgumentTypeError(
+                    '{0} is not a valid repo type.'.format(repo_type))
+        repo_types.append(repo_type)
+    return repo_types
+
+
+def parse_options():
+    parser = argparse.ArgumentParser(description='Using existing commit data, '
+            'refresh the last commit metadata in MongoDB. Run for all repos (no args), '
+            'or restrict by neighborhood, project, or code tool mount point.')
+    parser.add_argument('--nbhd', action='store', default='', dest='nbhd',
+            help='Restrict update to a particular neighborhood, e.g. /p/.')
+    parser.add_argument('--project', action='store', default='', dest='project',
+            help='Restrict update to a particular project. To specify a '
+            'subproject, use a slash: project/subproject.')
+    parser.add_argument('--project-regex', action='store', default='',
+            dest='project_regex',
+            help='Restrict update to projects for which the shortname matches '
+            'the provided regex.')
+    parser.add_argument('--repo-types', action='store', type=repo_type_list,
+            default=['git', 'hg'], dest='repo_types',
+            help='Only refresh last commits for repos of the given type(s). Defaults to: '
+            'git,hg. Example: --repo-types=git')
+    parser.add_argument('--mount_point', default='', dest='mount_point',
+            help='Restrict update to repos at the given tool mount point. ')
+    parser.add_argument('--clean', action='store_true', dest='clean',
+            default=False, help='Remove last commit mongo docs for '
+            'project(s) being refreshed before doing the refresh.')
+    parser.add_argument('--dry-run', action='store_true', dest='dry_run',
+            default=False, help='Log names of projects that would have their '
+            'last commits refreshed, but do not perform the actual refresh.')
+    parser.add_argument('--profile', action='store_true', dest='profile',
+            default=False, help='Enable the profiler (slow). Will log '
+            'profiling output to ./refresh.profile')
+    parser.add_argument('--diffs', action='store_true', dest='diffs',
+            default=False, help='Refresh diffs as well as LCDs')
+    parser.add_argument('--all', action='store_const', dest='step',
+            const=1, default=100, help='Refresh the LCD for every commit instead of every 100th')
+    parser.add_argument('--step', action='store', dest='step',
+            type=int, default=100, help='Refresh the LCD for every Nth commit instead of every 100th')
+    parser.add_argument('--skip', action='store', dest='skip',
+            type=int, default=0, help='Skip a number of commits')
+    return parser.parse_args()
+
+if __name__ == '__main__':
+    import sys
+    faulthandler.enable()
+    sys.exit(main(parse_options()))