--- a/Allura/test-light.py
+++ b/Allura/test-light.py
@@ -5,6 +5,7 @@
 from datetime import datetime
 
 from pylons import c
+from pymongo.errors import DuplicateKeyError
 
 from ming.base import Object
 
@@ -57,6 +58,12 @@
         refresh_commit_info(ci, seen)
         if (i+1) % 100 == 0:
             log.info('Refresh commit info %d: %s', (i+1), oid)
+
+    #############################################
+    # Everything below here is repo-agnostic
+    #############################################
+
+    refresh_repo(commit_ids, c.app.repo._id)
 
     # Refresh child references
     seen = set()
@@ -79,14 +86,13 @@
     # Refresh basic blocks
     bbb = BasicBlockBuilder(commit_ids)
     bbb.run()
+    bbb.cleanup()
 
     # Verify the log
     log.info('Logging via basic blocks')
-    with open('log.txt', 'w') as fp:
-        for i, ci in enumerate(commitlog(commit_ids[0])):
-            print >> fp, repr(ci)
-            log.info('%r', ci)
-    log.info('... done (%d commits from %s)', i, commit_ids[0])
+    for i, ci in enumerate(commitlog(commit_ids[0], skip=2000, limit=50)):
+        pass
+    log.info('... done (%d commits from %s)', i+1, commit_ids[0])
 
     # Refresh trees
     cache = {}
@@ -115,34 +121,45 @@
     return new_cache
 
 def refresh_commit_info(ci, seen):
-    ci_doc = M.repo.Commit(dict(
-            _id=ci.hexsha,
-            tree_id=ci.tree.hexsha,
-            committed = Object(
-                name=h.really_unicode(ci.committer.name),
-                email=h.really_unicode(ci.committer.email),
-                date=datetime.utcfromtimestamp(
-                    ci.committed_date-ci.committer_tz_offset)),
-            authored = Object(
-                name=h.really_unicode(ci.author.name),
-                email=h.really_unicode(ci.author.email),
-                date=datetime.utcfromtimestamp(
-                    ci.authored_date-ci.author_tz_offset)),
-            message=h.really_unicode(ci.message or ''),
-            child_ids=[],
-            parent_ids = [ p.hexsha for p in ci.parents ]))
+    if M.repo.Commit.m.find(dict(_id=ci.hexsha)).count() != 0:
+        return
+    try:
+        ci_doc = M.repo.Commit(dict(
+                _id=ci.hexsha,
+                tree_id=ci.tree.hexsha,
+                committed = Object(
+                    name=h.really_unicode(ci.committer.name),
+                    email=h.really_unicode(ci.committer.email),
+                    date=datetime.utcfromtimestamp(
+                        ci.committed_date-ci.committer_tz_offset)),
+                authored = Object(
+                    name=h.really_unicode(ci.author.name),
+                    email=h.really_unicode(ci.author.email),
+                    date=datetime.utcfromtimestamp(
+                        ci.authored_date-ci.author_tz_offset)),
+                message=h.really_unicode(ci.message or ''),
+                child_ids=[],
+                parent_ids = [ p.hexsha for p in ci.parents ]))
+        ci_doc.m.insert(safe=True)
+    except DuplicateKeyError:
+        return
     refresh_tree(ci.tree, seen)
-    ci_doc.m.save(safe=False)
-    return True
+
+def refresh_repo(commit_ids, repo_id):
+    for oids in utils.chunked_iter(commit_ids, QSIZE):
+        oids = list(oids)
+        M.repo.Commit.m.update_partial(
+            dict(
+                _id={'$in': oids},
+                repo_ids={'$ne': repo_id}),
+            {'$addToSet': dict(repo_ids=repo_id)},
+            multi=True)
 
 def refresh_children(ci):
-    '''
-    TODO: make sure we remove basic blocks created by previous refreshes when
-    there are extra children added.
-    '''
     M.repo.Commit.m.update_partial(
         dict(_id={'$in': ci.parent_ids}),
-        {'$addToSet': dict(child_ids=ci._id)})
+        {'$addToSet': dict(child_ids=ci._id)},
+        multi=True)
 
 class BasicBlockBuilder(object):
 
@@ -172,6 +189,33 @@
             bb.score = len(bb.commit_ids)
             bb.m.save()
         return self.blocks
+
+    def _all_blocks(self):
+        blocks = {}
+        for oids in utils.chunked_iter(self.commit_ids, QSIZE):
+            oids = list(oids)
+            for bb in M.repo.BasicBlock.m.find(dict(commit_ids={'$in': oids})):
+                blocks[bb._id] = bb
+        seen_bids = set()
+        blocks = blocks.values()
+        while blocks:
+            bb = blocks.pop()
+            if bb._id in seen_bids: continue
+            seen_bids.add(bb._id)
+            yield bb
+            for bb in M.repo.BasicBlock.m.find(
+                dict(commit_ids={'$in':bb.parent_commit_ids})):
+                blocks.append(bb)
+
+    def cleanup(self):
+        '''Delete non-maximal basic blocks'''
+        for bb1 in self._all_blocks():
+            for bb2 in M.repo.BasicBlock.m.find(dict(
+                    commit_ids=bb1.commit_ids[0])):
+                if bb2._id == bb1._id: continue
+                log.info('... delete %r (part of %r)', bb2, bb1)
+                import pdb; pdb.set_trace()
+                bb2.m.delete()
 
     def merge_blocks(self):
         while True: