from pprint import pprint
import csv
import urllib2
from cStringIO import StringIO
import json
import time
import feedparser
from html2text import html2text
from allura.lib import rest_api
class TracExport(object):
TICKET_URL = '/ticket/%d'
QUERY_MAX_ID_URL = '/query?col=id&order=id&desc=1&max=2'
QUERY_BY_PAGE_URL = '/query?col=id&col=time&col=changetime&order=id&max=100&page=%d'
FIELD_MAP = {
'reporter': 'submitter',
'owner': 'assigned_to',
}
def __init__(self, base_url):
self.base_url = base_url
# Contains additional info for a ticket which cannot
# be get with single-ticket export (create/mod times is
# and example).
self.ticket_map = {}
def remap_fields(self, dict):
"Remap fields to adhere to standard taxonomy."
out = {}
for k, v in dict.iteritems():
out[self.FIELD_MAP.get(k, k)] = v
if 'private' in out:
out['private'] = bool(int(out['private']))
return out
def full_url(self, suburl, type):
glue = '&' if '?' in suburl else '?'
return self.base_url + suburl + glue + 'format=' + type
def csvopen(self, url):
print url
f = urllib2.urlopen(url)
# Trac doesn't throw 403 error, just shows normal 200 HTML page
# telling that access denied. So, we'll emulate 403 ourselves.
# TODO: currently, any non-csv result treated as 403.
if not f.info()['Content-Type'].startswith('text/csv'):
raise urllib2.HTTPError(url, 403, 'Forbidden - emulated', f.info(), f)
return f
def parse_ticket_body(self, id):
# Use CSV export to get ticket fields
url = self.full_url(self.TICKET_URL % id, 'csv')
f = self.csvopen(url)
reader = csv.DictReader(f)
ticket_fields = reader.next()
ticket_fields['class'] = 'ARTIFACT'
return self.remap_fields(ticket_fields)
def parse_ticket_comments(self, id):
# Use RSS export to get ticket comments
d = feedparser.parse(self.full_url(self.TICKET_URL % id, 'rss'))
# pprint.pprint(d['entries'])
res = []
for comment in d['entries']:
c = {}
c['submitter'] = comment.author
c['date'] = comment.updated_parsed
c['comment'] = html2text(comment.summary)
c['class'] = 'COMMENT'
res.append(c)
return res
def get_ticket(self, id):
t = self.parse_ticket_body(id)
t['comments'] = self.parse_ticket_comments(id)
if id in self.ticket_map:
t.update(self.ticket_map[id])
return t
def get_ticket_ids_csv(self):
url = self.full_url(self.QUERY_URL, 'csv')
print url
f = urllib2.urlopen(url, timeout=None)
reader = csv.reader(f)
cols = reader.next()
ids = [r for r in reader]
return ids
def get_ticket_ids(self):
# As Trac has only one tracker, and number ticket sequantally,
# We have two choices here:
# 1. Get the last existing ticket id, and just make sequence
# from 1 to that id. But then we should be ready to teh fact
# that some tickets in this range will be unavailable.
# 2. Export artifact list and get ids which are really accessible
# to the current user.
# It turns out that we need to paginate artifact list, so it's
# some time and extra traffic, so first method used by default.
if False:
max = self.get_max_ticket_id()
return xrange(1, max + 1)
else:
return self.enumerate_ticket_ids()
def get_max_ticket_id(self):
url = self.full_url(self.QUERY_MAX_ID_URL, 'csv')
f = self.csvopen(url)
reader = csv.DictReader(f)
fields = reader.next()
print fields
return int(fields['id'])
@staticmethod
def trac2z_date(s):
assert len(s) == 25
assert s.endswith('+00:00')
return s[0:10] + 'T' + s[11:19] + 'Z'
def enumerate_ticket_ids(self, page=1, limit=-1):
'Go thru ticket list and collect available ticket ids.'
# We could just do CSV export, which by default dumps entire list
# Alas, for many busy servers with long ticket list, it will just
# time out. So, let's paginate it instead.
res = []
while limit != 0:
url = self.full_url(self.QUERY_BY_PAGE_URL % page, 'csv')
try:
f = self.csvopen(url)
except urllib2.HTTPError, e:
if 'emulated' in e.msg:
body = e.fp.read()
if 'beyond the number of pages in the query' in body:
break
raise
reader = csv.reader(f)
cols = reader.next()
for r in reader:
if r and r[0].isdigit():
id = int(r[0])
self.ticket_map[id] = {'date': self.trac2z_date(r[1]), 'date_updated': self.trac2z_date(r[2])}
res.append(id)
page += 1
if limit > 0:
limit -= 1
return res
class DateJSONEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, time.struct_time):
return time.strftime('%Y-%m-%dT%H:%M:%SZ', obj)
return json.JSONEncoder.default(self, obj)
if __name__ == '__main__':
TRAC_BASE_URL = 'http://sourceforge.net/apps/trac/sourceforge'
ex = TracExport(TRAC_BASE_URL)
# d = ex.parse_ticket_body(9)
# pprint(d)
# d = ex.parse_ticket_comments(9)
# pprint(d)
# d = ex.get_ticket(9)
# pprint(d)
# d = ex.get_max_ticket_id()
# d = ex.get_ticket_ids()
d = ex.enumerate_ticket_ids(limit=1)
ids = [3]
doc = [ex.get_ticket(i) for i in ids]
print json.dumps(doc, cls=DateJSONEncoder, indent=2)
# print d
# print ex.ticket_map