--- a
+++ b/trac-import/TracExport.py
@@ -0,0 +1,157 @@
+from pprint import pprint
+import csv
+import urllib2
+from cStringIO import StringIO
+import json
+import time
+
+import feedparser
+from html2text import html2text
+
+from allura.lib import rest_api
+
+
+class TracExport(object):
+
+ TICKET_URL = '/ticket/%d'
+ QUERY_MAX_ID_URL = '/query?col=id&order=id&desc=1&max=2'
+ QUERY_BY_PAGE_URL = '/query?col=id&order=id&max=100&page=%d'
+
+ FIELD_MAP = {
+ 'reporter': 'submitter',
+ 'owner': 'assigned_to',
+ }
+
+ def __init__(self, base_url):
+ self.base_url = base_url
+
+ def remap_fields(self, dict):
+ "Remap fields to adhere to standard taxonomy."
+ out = {}
+ for k, v in dict.iteritems():
+ out[self.FIELD_MAP.get(k, k)] = v
+
+ if 'private' in out:
+ out['private'] = bool(int(out['private']))
+ return out
+
+ def full_url(self, suburl, type):
+ glue = '&' if '?' in suburl else '?'
+ return self.base_url + suburl + glue + 'format=' + type
+
+ def csvopen(self, url):
+ print url
+ f = urllib2.urlopen(url)
+ # Trac doesn't throw 403 error, just shows normal 200 HTML page
+ # telling that access denied. So, we'll emulate 403 ourselves.
+ # TODO: currently, any non-csv result treated as 403.
+ if not f.info()['Content-Type'].startswith('text/csv'):
+ raise urllib2.HTTPError(url, 403, 'Forbidden - emulated', f.info(), f)
+ return f
+
+ def parse_ticket_body(self, id):
+ # Use CSV export to get ticket fields
+ url = self.full_url(self.TICKET_URL % id, 'csv')
+ f = self.csvopen(url)
+ reader = csv.DictReader(f)
+ ticket_fields = reader.next()
+ ticket_fields['class'] = 'ARTIFACT'
+ return self.remap_fields(ticket_fields)
+
+ def parse_ticket_comments(self, id):
+ # Use RSS export to get ticket comments
+ d = feedparser.parse(self.full_url(self.TICKET_URL % id, 'rss'))
+ # pprint.pprint(d['entries'])
+ res = []
+ for comment in d['entries']:
+ c = {}
+ c['submitter'] = comment.author
+ c['date'] = comment.updated_parsed
+ c['comment'] = html2text(comment.summary)
+ c['class'] = 'COMMENT'
+ res.append(c)
+ return res
+
+ def get_ticket(self, id):
+ t = self.parse_ticket_body(id)
+ t['comments'] = self.parse_ticket_comments(id)
+ return t
+
+ def get_ticket_ids_csv(self):
+ url = self.full_url(self.QUERY_URL, 'csv')
+ print url
+ f = urllib2.urlopen(url, timeout=None)
+ reader = csv.reader(f)
+ cols = reader.next()
+ ids = [r for r in reader]
+ return ids
+
+ def get_ticket_ids(self):
+ # As Trac has only one tracker, and number ticket sequantally,
+ # We have two choices here:
+ # 1. Get the last existing ticket id, and just make sequence
+ # from 1 to that id. But then we should be ready to teh fact
+ # that some tickets in this range will be unavailable.
+ # 2. Export artifact list and get ids which are really accessible
+ # to the current user.
+ # It turns out that we need to paginate artifact list, so it's
+ # some time and extra traffic, so first method used by default.
+ if False:
+ max = self.get_max_ticket_id()
+ return xrange(1, max + 1)
+ else:
+ return self.enumerate_ticket_ids()
+
+ def get_max_ticket_id(self):
+ url = self.full_url(self.QUERY_MAX_ID_URL, 'csv')
+ f = self.csvopen(url)
+ reader = csv.DictReader(f)
+ fields = reader.next()
+ print fields
+ return int(fields['id'])
+
+ def enumerate_ticket_ids(self, page=1):
+ 'Go thru ticket list and collect available ticket ids.'
+ # We could just do CSV export, which by default dumps entire list
+ # Alas, for many busy servers with long ticket list, it will just
+ # time out. So, let's paginate it instead.
+ res = []
+ while True:
+ url = self.full_url(self.QUERY_BY_PAGE_URL % page, 'csv')
+ try:
+ f = self.csvopen(url)
+ except urllib2.HTTPError, e:
+ if 'emulated' in e.msg:
+ body = e.fp.read()
+ if 'beyond the number of pages in the query' in body:
+ break
+ raise
+ reader = csv.reader(f)
+ cols = reader.next()
+ ids = [int(r[0]) for r in reader if r and r[0][0].isdigit()]
+ res += ids
+ page += 1
+
+ return res
+
+class DateJSONEncoder(json.JSONEncoder):
+ def default(self, obj):
+ if isinstance(obj, time.struct_time):
+ return time.strftime('%Y-%m-%dT%H:%M:%SZ', obj)
+ return json.JSONEncoder.default(self, obj)
+
+if __name__ == '__main__':
+ TRAC_BASE_URL = 'http://sourceforge.net/apps/trac/sourceforge'
+ ex = TracExport(TRAC_BASE_URL)
+# d = ex.parse_ticket_body(9)
+# pprint(d)
+# d = ex.parse_ticket_comments(9)
+# pprint(d)
+# d = ex.get_ticket(9)
+# pprint(d)
+# d = ex.get_max_ticket_id()
+# d = ex.get_ticket_ids()
+ ids = [3]
+ doc = [ex.get_ticket(i) for i in ids]
+ print json.dumps(doc, cls=DateJSONEncoder, indent=2)
+