--- a/scripts/teamforge-import.py
+++ b/scripts/teamforge-import.py
@@ -1,19 +1,31 @@
import logging
from getpass import getpass
from optparse import OptionParser
+from pylons import c
import re
import os
import os.path
from time import mktime
+import time
import json
from urlparse import urlparse
from urllib import FancyURLopener
+from datetime import datetime
+from ConfigParser import ConfigParser
+import random
+import string
+import sqlalchemy
from suds.client import Client
from suds import WebFault
-log = logging.getLogger(__file__)
+from ming.orm.ormsession import ThreadLocalORMSession
+from ming.base import Object
+from allura import model as M
+from allura.lib import helpers as h
+from allura.lib import utils
+log = logging.getLogger('teamforge-import')
@@ -25,37 +37,70 @@
options = None
s = None # security token
+client = None # main api client
+users = {}
def make_client(api_url, app):
return Client(api_url + app + '?wsdl', location=api_url + app)
def main():
- global options, s
- optparser = OptionParser(usage='''%prog [--options] [projID projID projID]\nIf no project ids are given, all projects will be migrated''')
- optparser.add_option('--api-url', dest='api_url', help='e.g. https://hostname/ce-soap50/services/')
- optparser.add_option('--attachment-url', dest='attachment_url', default='/sf/%s/do/%s/')
- optparser.add_option('--default-wiki-text', dest='default_wiki_text', default='PRODUCT NAME HERE', help='used in determining if a wiki page text is default or changed')
- optparser.add_option('-u', '--username', dest='username')
- optparser.add_option('-p', '--password', dest='password')
- optparser.add_option('-o', '--output-dir', dest='output_dir', default='teamforge-export/')
- optparser.add_option('--list-project-ids', action='store_true', dest='list_project_ids')
+ global options, s, client, users
+ defaults=dict(
+ api_url=None,
+ attachment_url='/sf/%s/do/%s/',
+ default_wiki_text='PRODUCT NAME HERE',
+ username=None,
+ password=None,
+ output_dir='teamforge-export/',
+ list_project_ids=False,
+ neighborhood=None,
+ neighborhood_shortname=None,
+ skip_frs_download=False,
+ skip_unsupported_check=False)
+ optparser = get_parser(defaults)
options, project_ids = optparser.parse_args()
- c = make_client(options.api_url, 'CollabNet')
- api_v = c.service.getApiVersion()
- if not api_v.startswith('5.4.'):
- log.warning('Unexpected API Version %s. May not work correctly.' % api_v)
- s = c.service.login(options.username, options.password or getpass('Password: '))
- teamforge_v = c.service.getVersion(s)
- if not teamforge_v.startswith('5.4.'):
- log.warning('Unexpected TeamForge Version %s. May not work correctly.' % teamforge_v)
+ if options.config_file:
+ config = ConfigParser()
+ config.read(options.config_file)
+ defaults.update(
+ (k, eval(v)) for k,v in config.items('teamforge-import'))
+ optparser = get_parser(defaults)
+ options, project_ids = optparser.parse_args()
+ # neither specified, so do both
+ if not options.extract and not options.load:
+ options.extract = True
+ options.load = True
+ if options.extract:
+ client = make_client(options.api_url, 'CollabNet')
+ api_v = client.service.getApiVersion()
+ if not api_v.startswith('5.4.'):
+ log.warning('Unexpected API Version %s. May not work correctly.' % api_v)
+ s = client.service.login(options.username, options.password or getpass('Password: '))
+ teamforge_v = client.service.getVersion(s)
+ if not teamforge_v.startswith('5.4.'):
+ log.warning('Unexpected TeamForge Version %s. May not work correctly.' % teamforge_v)
+ if options.load:
+ if not options.neighborhood:
+ log.error('You must specify a neighborhood when loading')
+ return
+ try:
+ nbhd = M.Neighborhood.query.get(name=options.neighborhood)
+ except:
+ log.exception('error querying mongo')
+ log.error('This should be run as "paster script production.ini ../scripts/teamforge-import.py -- ...options.."')
+ return
+ assert nbhd
if not project_ids:
- projects = c.service.getProjectList(s)
+ if not options.extract:
+ log.error('You must specify project ids')
+ return
+ projects = client.service.getProjectList(s)
project_ids = [p.id for p in projects.dataRows]
if options.list_project_ids:
@@ -65,17 +110,469 @@
if not os.path.exists(options.output_dir):
for pid in project_ids:
- project = c.service.getProjectData(s, pid)
- project.shortname = project.path.split('.')[-1]
- log.info('Project: %s %s %s' % (project.id, project.title, project.path))
- out_dir = os.path.join(options.output_dir, project.id)
- if not os.path.exists(out_dir):
- os.mkdir(out_dir)
- get_files(project)
- get_homepage_wiki(project)
- check_unsupported_tools(project)
+ if options.extract:
+ try:
+ project = client.service.getProjectData(s, pid)
+ log.info('Project: %s %s %s' % (project.id, project.title, project.path))
+ out_dir = os.path.join(options.output_dir, project.id)
+ if not os.path.exists(out_dir):
+ os.mkdir(out_dir)
+ get_project(project)
+ get_files(project)
+ get_homepage_wiki(project)
+ get_discussion(project)
+ get_news(project)
+ if not options.skip_unsupported_check:
+ check_unsupported_tools(project)
+ with open(os.path.join(options.output_dir, 'users.json'), 'w') as user_file:
+ json.dump(users, user_file, default=str)
+ except:
+ log.exception('Error extracting %s' % pid)
+ if options.load:
+ try:
+ project = create_project(pid, nbhd)
+ except:
+ log.exception('Error creating %s' % pid)
+def load_users():
+ ''' load the users data from file, if it hasn't been already '''
+ global users
+ user_filename = os.path.join(options.output_dir, 'users.json')
+ if not users and os.path.exists(user_filename):
+ with open(user_filename) as user_file:
+ users = json.load(user_file, object_hook=Object) # Object for attribute access
+def save_user(usernames):
+ if isinstance(usernames, basestring):
+ usernames = [usernames]
+ load_users()
+ for username in usernames:
+ if username not in users:
+ user_data = client.service.getUserData(s, username)
+ users[username] = Object(user_data)
+ if users[username].status != 'Active':
+ log.warn('user: %s status: %s' % (username, users[username].status))
+def get_project(project):
+ global client
+ cats = make_client(options.api_url, 'CategorizationApp')
+ data = client.service.getProjectData(s, project.id)
+ access_level = { 1: 'public', 4: 'private', 3: 'gated community'}[
+ client.service.getProjectAccessLevel(s, project.id)
+ ]
+ admins = client.service.listProjectAdmins(s, project.id).dataRows
+ members = client.service.getProjectMemberList(s, project.id).dataRows
+ groups = client.service.getProjectGroupList(s, project.id).dataRows
+ categories = cats.service.getProjectCategories(s, project.id).dataRows
+ save(json.dumps(dict(
+ data = dict(data),
+ access_level = access_level,
+ admins = map(dict, admins),
+ members = map(dict, members),
+ groups = map(dict, groups),
+ categories = map(dict, categories),
+ ), default=str),
+ project, project.id+'.json')
+ if len(groups):
+ log.warn('Project has groups %s' % groups)
+ for u in admins:
+ if not u.status != 'active':
+ log.warn('inactive admin %s' % u)
+ if u.superUser:
+ log.warn('super user admin %s' % u)
+ save_user(data.createdBy)
+ save_user(u.userName for u in admins)
+ save_user(u.userName for u in members)
+def get_user(orig_username):
+ 'returns an allura User object'
+ sf_username = orig_username.replace('_','-').lower()
+ # FIXME username translation is hardcoded here:
+ sf_username = dict(
+ rlevy = 'ramilevy',
+ mkeisler = 'mkeisler',
+ bthale = 'bthale',
+ mmuller = 'mattjustmull',
+ MalcolmDwyer = 'slagheap',
+ tjyang = 'tjyang',
+ manaic = 'maniac76',
+ srinid = 'cnudav',
+ es = 'est016',
+ david_peyer = 'david-mmi',
+ okruse = 'ottokruse',
+ jvp = 'jvpmoto',
+ dmorelli = 'dmorelli',
+ ).get(sf_username, sf_username + '-mmi')
+ if len(sf_username) > 15:
+ adjusted_username = sf_username[0:15-4] + '-mmi'
+ log.error('invalid sf_username length: %s Changing it to %s' % (sf_username, adjusted_username))
+ sf_username = adjusted_username
+ u = M.User.by_username(sf_username)
+ if not u:
+ load_users()
+ user = users[orig_username]
+ if user.status != 'Active':
+ log.warn('Inactive user %s %s' % (orig_username, user.status))
+ if not 3 <= len(user.fullName) <= 32:
+ raise Exception('invalid fullName length: %s' % user.fullName)
+ if '@' not in user.email:
+ raise Exception('invalid email: %s' % user.email)
+ # FIXME: hardcoded SFX integration
+ from sfx.model import tables as T
+ nu = T.users.insert()
+ nu.execute(user_name=sf_username.encode('utf-8'),
+ email=user.email.lower().encode('utf-8'),
+ realname=user.fullName.encode('utf-8'),
+ status='A' if user.status == 'Active' else 'D',
+ language=275, # english trove id
+ timezone=user.timeZone,
+ user_pw=''.join(random.sample(string.printable, 32)),
+ unix_pw=''.join(random.sample(string.printable, 32)),
+ user_pw_modtime=int(time.time()),
+ mail_siteupdates=0,
+ add_date=int(time.time()),
+ )
+ user_id = sqlalchemy.select([T.users.c.user_id], T.users.c.user_name==sf_username).execute().fetchone().user_id
+ npref = T.user_preferences.insert()
+ npref.execute(user_id=user_id, preference_name='country', preference_value='US')
+ npref.execute(user_id=user_id, preference_name='opt_research', preference_value=0)
+ npref.execute(user_id=user_id, preference_name='opt_thirdparty', preference_value=0)
+ new_audit = T.audit_trail_user.insert()
+ new_audit.execute(
+ date=int(time.time()),
+ username='nobody',
+ ip_address='(imported)',
+ operation_resource=user_id,
+ operation='%s user account created by TeamForge import script' % user.status,
+ operation_target='',
+ )
+ u = M.User.by_username(sf_username)
+ assert u
+ return u
+def convert_project_shortname(teamforge_path):
+ 'convert from TeamForge to SF, and validate early'
+ tf_shortname = teamforge_path.split('.')[-1]
+ sf_shortname = tf_shortname.replace('_','-')
+ # FIXME hardcoded translations
+ sf_shortname = {
+ 'i1': 'motorola-i1',
+ 'i9': 'motorola-i9',
+ 'devplatformforocap': 'ocap-dev-pltfrm',
+ }.get(sf_shortname, sf_shortname)
+ if not 3 <= len(sf_shortname) <= 15:
+ raise ValueError('Project name length must be between 3 & 15, inclusive: %s (%s)' %
+ (sf_shortname, len(sf_shortname)))
+ return sf_shortname
+# FIXME hardcoded
+skip_perms_usernames = set([
+ 'faisal_saeed','dsarkisian','debonairamit','nishanthiremath','Bhuvnesh','bluetooth','cnkurzke','makow2','jannes1','Joel_Hegberg','Farroc','brian_chen','eirikur',
+ 'dmitry_flyorov','bipingm','MornayJo','ibv','b_weisshaar','k9srb','johnmmills','a_gomolitsky','filim','kapoor','ljzegers','jrukes','dwilson9','jlin','quickie',
+ 'johnbell','nnikolenko','Gaetan','Giannetta','Katia','jackhan','jacobwangus','adwankar','dinobrusco','qbarnes','ilmojung','clifford_chan','nbaig','fhutchi1',
+ 'rinofarina','baiyanbin','muralidhar','duanyiruo','bredding','mkolkey','manvith','nanduk','engyihan','deepsie','dabon','dino_jiang','mattrose','peter_j_wilhelm',
+ 'emx2500','jmcguire','lfilimowski','guruppandit','abhilashisme','edwinhm','rabbi','ferrans','guna','kevin_robinson','adathiruthi','kochen','onehap','kalanithi',
+ 'jamesn','obu001','chetanv','Avinash','HugoBoss','Han_Wei','mhooper','g16872','mfcarignano','jim_burke','kevin','arunkarra','adam_feng','pavan_scm','kostya_katz',
+ 'ppazderka','eileenzhuang','pyammine','judyho','ashoykh','rdemento','ibrahim','min_wang','arvind_setlur','moorthy_karthik','daniel_nelson','dms','esnmurthy',
+ 'rasa_bonyadlou','prashantjoshi','edkeating','billsaez','cambalindo','jims','bozkoyun','andry_deltsov','bpowers','manuel_milli','maryparsons','spriporov','yutianli',
+ 'xiebin','tnemeth1','udayaps','zzzzuser','timberger','sbarve1','zarman','rwallace67','thangavelu_arum','yuhuaixie','tingup','sekchai','sasanplus','rupal','sebastien_hertz',
+ 'sab8123','rony_lim','slava_kirillin','smwest','wendydu_yq','sco002','RonFred','spatnala','vd','Sunny','tthompson','sunijams','slaw','rodovich','zhangqingqi82','venki',
+ 'yuntaom','xiaojin','walterciocosta','straus','Thomas','stupka','wangyu','yaowang','wisekb','tyler_louie','smartgarfield','shekar_mahalingam',
+ 'venkata_akella','v_yellapragada','vavasthi','rpatel','zhengfang','sweetybala','vap','sergey','ymhuang','spatel78745'
+def create_project(pid, nbhd):
+ M.session.artifact_orm_session._get().skip_mod_date = True
+ data = loadjson(pid, pid+'.json')
+ #pprint(data)
+ log.info('Loading: %s %s %s' % (pid, data.data.title, data.data.path))
+ shortname = convert_project_shortname(data.data.path)
+ project = M.Project.query.get(shortname=shortname)
+ if not project:
+ private = (data.access_level == 'private')
+ log.debug('Creating %s private=%s' % (shortname, private))
+ project = nbhd.register_project(shortname,
+ get_user(data.data.createdBy),
+ private_project=private)
+ project.notifications_disabled = True
+ project.name = data.data.title
+ project.short_description = data.data.description
+ project.last_updated = datetime.strptime(data.data.lastModifiedDate, '%Y-%m-%d %H:%M:%S')
+ M.main_orm_session.flush(project)
+ # TODO: push last_updated to gutenberg?
+ # TODO: try to set createdDate?
+ role_admin = M.ProjectRole.by_name('Admin', project)
+ admin_usernames = set()
+ for admin in data.admins:
+ if admin.userName in skip_perms_usernames:
+ continue
+ admin_usernames.add(admin.userName)
+ user = get_user(admin.userName)
+ c.user = user
+ pr = user.project_role(project)
+ pr.roles = [ role_admin._id ]
+ ThreadLocalORMSession.flush_all()
+ role_developer = M.ProjectRole.by_name('Developer', project)
+ for member in data.members:
+ if member.userName in skip_perms_usernames:
+ continue
+ if member.userName in admin_usernames:
+ continue
+ user = get_user(member.userName)
+ pr = user.project_role(project)
+ pr.roles = [ role_developer._id ]
+ ThreadLocalORMSession.flush_all()
+ project.labels = [cat.path.split('projects/categorization.root.')[1] for cat in data.categories]
+ icon_file = 'emsignia-MOBILITY-red.png'
+ if 'nsn' in project.labels or 'msi' in project.labels:
+ icon_file = 'emsignia-SOLUTIONS-blue.gif'
+ if project.icon:
+ M.ProjectFile.remove(dict(project_id=project._id, category='icon'))
+ with open(os.path.join('..','scripts',icon_file)) as fp:
+ M.ProjectFile.save_image(
+ icon_file, fp, content_type=utils.guess_mime_type(icon_file),
+ square=True, thumbnail_size=(48,48),
+ thumbnail_meta=dict(project_id=project._id,category='icon'))
+ ThreadLocalORMSession.flush_all()
+ dirs = os.listdir(os.path.join(options.output_dir, pid))
+ frs_mapping = loadjson(pid, 'frs_mapping.json')
+ if 'wiki' in dirs:
+ import_wiki(project,pid)
+ if not project.app_instance('downloads'):
+ project.install_app('Downloads', 'downloads')
+ if 'forum' in dirs:
+ import_discussion(project, pid, frs_mapping, shortname)
+ if 'news' in dirs:
+ import_news(project, pid, frs_mapping, shortname)
+ project.notifications_disabled = False
+ ThreadLocalORMSession.flush_all()
+ return project
+def import_wiki(project, pid):
+ from forgewiki import model as WM
+ def upload_attachments(page, pid, beginning):
+ dirpath = os.path.join(options.output_dir, pid, 'wiki', beginning)
+ if not os.path.exists(dirpath): return
+ files = os.listdir(dirpath)
+ for f in files:
+ with open(os.path.join(options.output_dir, pid, 'wiki', beginning, f)) as fp:
+ page.attach(f, fp, content_type=utils.guess_mime_type(f))
+ pages = os.listdir(os.path.join(options.output_dir, pid, 'wiki'))
+ # handle the homepage content
+ if 'homepage_text.markdown' in pages:
+ home_app = project.app_instance('home')
+ h.set_context(project.shortname, 'home')
+ # set permissions and config options
+ role_admin = M.ProjectRole.by_name('Admin')._id
+ role_anon = M.ProjectRole.by_name('*anonymous')._id
+ home_app.config.options['show_discussion'] = False
+ home_app.config.options['show_left_bar'] = False
+ home_app.config.options['show_right_bar'] = False
+ home_app.config.acl = [
+ M.ACE.allow(role_anon, 'read'),
+ M.ACE.allow(role_admin, 'create'),
+ M.ACE.allow(role_admin, 'edit'),
+ M.ACE.allow(role_admin, 'delete'),
+ M.ACE.allow(role_admin, 'moderate'),
+ M.ACE.allow(role_admin, 'configure'),
+ M.ACE.allow(role_admin, 'admin')]
+ p = WM.Page.upsert('Home')
+ p.text = wiki2markdown(load(pid, 'wiki', 'homepage_text.markdown'))
+ upload_attachments(p, pid, 'homepage')
+ if 'HomePage.json' in pages and 'HomePage.markdown' in pages:
+ wiki_app = project.app_instance('wiki')
+ if not wiki_app:
+ wiki_app = project.install_app('Wiki', 'wiki')
+ h.set_context(project.shortname, 'wiki')
+ # set permissions and config options
+ role_admin = M.ProjectRole.by_name('Admin')._id
+ role_anon = M.ProjectRole.by_name('*anonymous')._id
+ wiki_app.config.options['show_discussion'] = False
+ wiki_app.config.options['show_left_bar'] = False
+ wiki_app.config.options['show_right_bar'] = False
+ wiki_app.config.acl = [
+ M.ACE.allow(role_anon, 'read'),
+ M.ACE.allow(role_admin, 'create'),
+ M.ACE.allow(role_admin, 'edit'),
+ M.ACE.allow(role_admin, 'delete'),
+ M.ACE.allow(role_admin, 'moderate'),
+ M.ACE.allow(role_admin, 'configure'),
+ M.ACE.allow(role_admin, 'admin')]
+ # make all the wiki pages
+ for page in pages:
+ ending = page[-5:]
+ beginning = page[:-5]
+ markdown_file = '%s.markdown' % beginning
+ if '.json' == ending and markdown_file in pages:
+ page_data = loadjson(pid, 'wiki', page)
+ content = load(pid, 'wiki', markdown_file)
+ if page == 'HomePage.json':
+ globals = WM.Globals.query.get(app_config_id=wiki_app.config._id)
+ if globals is not None:
+ globals.root = page_data.title
+ else:
+ globals = WM.Globals(app_config_id=wiki_app.config._id, root=page_data.title)
+ p = WM.Page.upsert(page_data.title)
+ p.viewable_by = ['all']
+ p.text = wiki2markdown(content)
+ # upload attachments
+ upload_attachments(p, pid, beginning)
+ if not p.history().first():
+ p.commit()
+ ThreadLocalORMSession.flush_all()
+def import_discussion(project, pid, frs_mapping, sf_project_shortname):
+ from forgediscussion import model as DM
+ discuss_app = project.app_instance('discussion')
+ if not discuss_app:
+ discuss_app = project.install_app('Discussion', 'discussion')
+ h.set_context(project.shortname, 'discussion')
+ # set permissions and config options
+ role_admin = M.ProjectRole.by_name('Admin')._id
+ role_developer = M.ProjectRole.by_name('Developer')._id
+ role_auth = M.ProjectRole.by_name('*authenticated')._id
+ role_anon = M.ProjectRole.by_name('*anonymous')._id
+ discuss_app.config.acl = [
+ M.ACE.allow(role_anon, 'read'),
+ M.ACE.allow(role_auth, 'post'),
+ M.ACE.allow(role_auth, 'unmoderated_post'),
+ M.ACE.allow(role_developer, 'moderate'),
+ M.ACE.allow(role_admin, 'configure'),
+ M.ACE.allow(role_admin, 'admin')]
+ ThreadLocalORMSession.flush_all()
+ DM.Forum.query.remove(dict(app_config_id=discuss_app.config._id,shortname='general'))
+ forums = os.listdir(os.path.join(options.output_dir, pid, 'forum'))
+ for forum in forums:
+ ending = forum[-5:]
+ forum_name = forum[:-5]
+ if '.json' == ending and forum_name in forums:
+ forum_data = loadjson(pid, 'forum', forum)
+ fo = DM.Forum.query.get(shortname=forum_name, app_config_id=discuss_app.config._id)
+ if not fo:
+ fo = DM.Forum(app_config_id=discuss_app.config._id, shortname=forum_name)
+ fo.name = forum_data.title
+ fo.description = forum_data.description
+ fo_num_topics = 0
+ fo_num_posts = 0
+ topics = os.listdir(os.path.join(options.output_dir, pid, 'forum', forum_name))
+ for topic in topics:
+ ending = topic[-5:]
+ topic_name = topic[:-5]
+ if '.json' == ending and topic_name in topics:
+ fo_num_topics += 1
+ topic_data = loadjson(pid, 'forum', forum_name, topic)
+ to = DM.ForumThread.query.get(
+ subject=topic_data.title,
+ discussion_id=fo._id,
+ app_config_id=discuss_app.config._id)
+ if not to:
+ to = DM.ForumThread(
+ subject=topic_data.title,
+ discussion_id=fo._id,
+ app_config_id=discuss_app.config._id)
+ to_num_replies = 0
+ oldest_post = None
+ newest_post = None
+ posts = sorted(os.listdir(os.path.join(options.output_dir, pid, 'forum', forum_name, topic_name)))
+ for post in posts:
+ ending = post[-5:]
+ post_name = post[:-5]
+ if '.json' == ending:
+ to_num_replies += 1
+ post_data = loadjson(pid, 'forum', forum_name, topic_name, post)
+ p = DM.ForumPost.query.get(
+ _id='%s%s@import' % (post_name,str(discuss_app.config._id)),
+ thread_id=to._id,
+ discussion_id=fo._id,
+ app_config_id=discuss_app.config._id)
+ if not p:
+ p = DM.ForumPost(
+ _id='%s%s@import' % (post_name,str(discuss_app.config._id)),
+ thread_id=to._id,
+ discussion_id=fo._id,
+ app_config_id=discuss_app.config._id)
+ create_date = datetime.strptime(post_data.createdDate, '%Y-%m-%d %H:%M:%S')
+ p.timestamp = create_date
+ p.author_id = str(get_user(post_data.createdByUserName)._id)
+ p.text = convert_post_content(frs_mapping, sf_project_shortname, post_data.content)
+ p.status = 'ok'
+ if post_data.replyToId:
+ p.parent_id = '%s%s@import' % (post_data.replyToId,str(discuss_app.config._id))
+ slug, full_slug = p.make_slugs(parent = p.parent, timestamp = create_date)
+ p.slug = slug
+ p.full_slug = full_slug
+ if oldest_post == None or oldest_post.timestamp > create_date:
+ oldest_post = p
+ if newest_post == None or newest_post.timestamp < create_date:
+ newest_post = p
+ ThreadLocalORMSession.flush_all()
+ to.num_replies = to_num_replies
+ to.first_post_id = oldest_post._id
+ to.last_post_date = newest_post.timestamp
+ to.mod_date = newest_post.timestamp
+ fo_num_posts += to_num_replies
+ fo.num_topics = fo_num_topics
+ fo.num_posts = fo_num_posts
+ ThreadLocalORMSession.flush_all()
+def import_news(project, pid, frs_mapping, sf_project_shortname):
+ from forgeblog import model as BM
+ posts = os.listdir(os.path.join(options.output_dir, pid, 'news'))
+ if len(posts):
+ news_app = project.app_instance('news')
+ if not news_app:
+ news_app = project.install_app('blog', 'news', mount_label='News')
+ h.set_context(project.shortname, 'news')
+ # make all the blog posts
+ for post in posts:
+ if '.json' == post[-5:]:
+ post_data = loadjson(pid, 'news', post)
+ p = BM.BlogPost.query.get(title=post_data.title,app_config_id=news_app.config._id)
+ if not p:
+ p = BM.BlogPost(title=post_data.title,app_config_id=news_app.config._id)
+ p.text = convert_post_content(frs_mapping, sf_project_shortname, post_data.body)
+ create_date = datetime.strptime(post_data.createdOn, '%Y-%m-%d %H:%M:%S')
+ p.timestamp = create_date
+ p.mod_date = create_date
+ p.state = 'published'
+ if not p.slug:
+ p.make_slug()
+ if not p.history().first():
+ p.commit()
+ ThreadLocalORMSession.flush_all()
+ M.Thread(discussion_id=p.app_config.discussion_id,
+ ref_id=p.index_id(),
+ subject='%s discussion' % p.title)
+ user = get_user(post_data.createdByUsername)
+ p.history().first().author=dict(
+ id=user._id,
+ username=user.username,
+ display_name=user.get_pref('display_name'))
+ ThreadLocalORMSession.flush_all()
def check_unsupported_tools(project):
docs = make_client(options.api_url, 'DocumentApp')
@@ -102,6 +599,16 @@
log.warn('Migrating trackers is not supported, but found %s tracker artifacts' % task_count)
+def load(project_id, *paths):
+ in_file = os.path.join(options.output_dir, project_id, *paths)
+ with open(in_file) as input:
+ content = input.read()
+ return content
+def loadjson(*args):
+ # Object for attribute access
+ return json.loads(load(*args), object_hook=Object)
def save(content, project, *paths):
out_file = os.path.join(options.output_dir, project.id, *paths)
if not os.path.exists(os.path.dirname(out_file)):
@@ -127,9 +634,12 @@
if not os.path.exists(os.path.dirname(out_file)):
- hostname = urlparse(options.api_url).hostname
- scheme = urlparse(options.api_url).scheme
- url = scheme + '://' + hostname + action_url + url_path
+ if '://' in url_path:
+ url = url_path
+ else:
+ hostname = urlparse(options.api_url).hostname
+ scheme = urlparse(options.api_url).scheme
+ url = scheme + '://' + hostname + action_url + url_path
log.debug('fetching %s' % url)
statusCheckingURLopener.retrieve(url, out_file)
return out_file
@@ -138,17 +648,19 @@
h1 = re.compile(r'^!!!', re.MULTILINE)
h2 = re.compile(r'^!!', re.MULTILINE)
h3 = re.compile(r'^!', re.MULTILINE)
+re_stats = re.compile(r'#+ .* [Ss]tatistics\n+(.*\[sf:.*?Statistics\].*)+')
def wiki2markdown(markup):
Partial implementation of http://help.collab.net/index.jsp?topic=/teamforge520/reference/wiki-wikisyntax.html
def bracket_handler(matchobj):
snippet = matchobj.group(1)
+ ext = snippet.rsplit('.')[-1].lower()
# TODO: support [foo|bar.jpg]
if snippet.startswith('sf:'):
# can't handle these macros
return matchobj.group(0)
- elif snippet.endswith('.jpg') or snippet.endswith('.gif') or snippet.endswith('.png'):
+ elif ext in ('jpg', 'gif', 'png'):
filename = snippet.split('/')[-1]
return '[[img src=%s]]' % filename
elif '|' in snippet:
@@ -161,14 +673,66 @@
markup = h1.sub('#', markup)
markup = h2.sub('##', markup)
markup = h3.sub('###', markup)
+ markup = re_stats.sub('', markup)
return markup
+re_rel = re.compile(r'\b(rel\d+)\b')
+def convert_post_content(frs_mapping, sf_project_shortname, text):
+ def rel_handler(matchobj):
+ relno = matchobj.group(1)
+ path = frs_mapping.get(relno)
+ if path:
+ return '<a href="/projects/%s/files/%s">%s</a>' % (
+ sf_project_shortname, path, path)
+ else:
+ return relno
+ text = re_rel.sub(rel_handler, text)
+ return text
def find_image_references(markup):
'yields filenames'
for matchobj in bracket_macro.finditer(markup):
snippet = matchobj.group(1)
- if snippet.endswith('.jpg') or snippet.endswith('.gif') or snippet.endswith('.png'):
+ ext = snippet.rsplit('.')[-1].lower()
+ if ext in ('jpg', 'gif', 'png'):
yield snippet
+def get_news(project):
+ '''
+ Extracts news posts
+ '''
+ app = make_client(options.api_url, 'NewsApp')
+ # find the forums
+ posts = app.service.getNewsPostList(s, project.id)
+ for post in posts.dataRows:
+ save(json.dumps(dict(post), default=str), project, 'news', post.id+'.json')
+ save_user(post.createdByUsername)
+def get_discussion(project):
+ '''
+ Extracts discussion forums and posts
+ '''
+ app = make_client(options.api_url, 'DiscussionApp')
+ # find the forums
+ forums = app.service.getForumList(s, project.id)
+ for forum in forums.dataRows:
+ forumname = forum.path.split('.')[-1]
+ log.info('Retrieving data for forum: %s' % forumname)
+ save(json.dumps(dict(forum), default=str), project, 'forum', forumname+'.json')
+ # topic in this forum
+ topics = app.service.getTopicList(s, forum.id)
+ for topic in topics.dataRows:
+ save(json.dumps(dict(topic), default=str), project, 'forum', forumname, topic.id+'.json')
+ # posts in this topic
+ posts = app.service.getPostList(s, topic.id)
+ for post in posts.dataRows:
+ save(json.dumps(dict(post), default=str), project, 'forum', forumname, topic.id, post.id+'.json')
+ save_user(post.createdByUserName)
def get_homepage_wiki(project):
@@ -202,21 +766,50 @@
log.warn('did not find homepage')
if homepage:
- save(homepage, project, 'wiki', 'homepage.markdown')
+ save(homepage, project, 'wiki', 'homepage_text.markdown')
for img_ref in find_image_references(homepage):
filename = img_ref.split('/')[-1]
- download_file('wiki', project.path + '/wiki/' + img_ref, project.id, 'homepage', filename)
+ if '://' in img_ref:
+ img_url = img_ref
+ else:
+ img_url = project.path + '/wiki/' + img_ref
+ download_file('wiki', img_url, project.id, 'wiki', 'homepage', filename)
for path, text in pages.iteritems():
if options.default_wiki_text in text:
log.debug('skipping default wiki page %s' % path)
save(text, project, 'wiki', path+'.markdown')
+ for img_ref in find_image_references(text):
+ filename = img_ref.split('/')[-1]
+ if '://' in img_ref:
+ img_url = img_ref
+ else:
+ img_url = project.path + '/wiki/' + img_ref
+ download_file('wiki', img_url, project.id, 'wiki', path, filename)
+def _dir_sql(created_on, project, dir_name, rel_path):
+ assert options.neighborhood_shortname
+ if not rel_path:
+ parent_directory = "'1'"
+ else:
+ parent_directory = "(SELECT pfs_path FROM pfs_path WHERE path_name = '%s/')" % rel_path
+ sql = """
+ UPDATE pfs
+ SET file_crtime = '%s'
+ WHERE source_pk = (SELECT project.project FROM project WHERE project.project_name = '%s.%s')
+ AND source_table = 'project'
+ AND pfs_type = 'd'
+ AND pfs_name = '%s'
+ AND parent_directory = %s;
+ """ % (created_on, convert_project_shortname(project.path), options.neighborhood_shortname, dir_name, parent_directory)
+ return sql
def get_files(project):
frs = make_client(options.api_url, 'FrsApp')
valid_pfs_filename = re.compile(r'(?![. ])[-_ +.,=#~@!()\[\]a-zA-Z0-9]+(?<! )$')
- pfs_output_dir = os.path.join(os.path.abspath(options.output_dir), 'PFS', project.shortname)
+ pfs_output_dir = os.path.join(os.path.abspath(options.output_dir), 'PFS', convert_project_shortname(project.path))
+ sql_updates = ''
def handle_path(obj, prev_path):
path_component = obj.title.strip().replace('/', ' ').replace('&','').replace(':','')
@@ -227,6 +820,8 @@
project, 'frs', path+'.json')
return path
+ frs_mapping = {}
for pkg in frs.service.getPackageList(s, project.id).dataRows:
pkg_path = handle_path(pkg, '')
pkg_details = frs.service.getPackageData(s, pkg.id) # download count
@@ -235,6 +830,7 @@
for rel in frs.service.getReleaseList(s, pkg.id).dataRows:
rel_path = handle_path(rel, pkg_path)
+ frs_mapping[rel['id']] = rel_path
rel_details = frs.service.getReleaseData(s, rel.id) # download count
save(json.dumps(dict(rel_details), default=str),
project, 'frs', rel_path+'_details.json')
@@ -242,7 +838,7 @@
for file in frs.service.getFrsFileList(s, rel.id).dataRows:
details = frs.service.getFrsFileData(s, file.id)
- file_path = os.path.join(rel_path, file.title.strip())
+ file_path = handle_path(file, rel_path)
@@ -252,42 +848,75 @@
- #'''
- download_file('frs', rel.path + '/' + file.id, pfs_output_dir, file_path)
- # TODO: createdOn
- mtime = int(mktime(details.lastModifiedDate.timetuple()))
- os.utime(os.path.join(pfs_output_dir, file_path), (mtime, mtime))
- # now set mtime on the way back up the tree (so it isn't clobbered):
- # TODO: createdOn
+ if not options.skip_frs_download:
+ download_file('frs', rel.path + '/' + file.id, pfs_output_dir, file_path)
+ mtime = int(mktime(details.lastModifiedDate.timetuple()))
+ os.utime(os.path.join(pfs_output_dir, file_path), (mtime, mtime))
+ # releases
+ created_on = int(mktime(rel.createdOn.timetuple()))
mtime = int(mktime(rel.lastModifiedOn.timetuple()))
- os.utime(os.path.join(pfs_output_dir, rel_path), (mtime, mtime))
- # TODO: createdOn
+ if os.path.exists(os.path.join(pfs_output_dir, rel_path)):
+ os.utime(os.path.join(pfs_output_dir, rel_path), (mtime, mtime))
+ sql_updates += _dir_sql(created_on, project, rel.title.strip(), pkg_path)
+ # packages
+ created_on = int(mktime(pkg.createdOn.timetuple()))
mtime = int(mktime(pkg.lastModifiedOn.timetuple()))
- os.utime(os.path.join(pfs_output_dir, pkg_path), (mtime, mtime))
- #'''
-print c.service.getProjectData(s, p.id)
-print c.service.getProjectAccessLevel(s, p.id)
-print c.service.listProjectAdmins(s, p.id)
-for forum in discussion.service.getForumList(s, p.id).dataRows:
- print forum.title
- for topic in discussion.service.getTopicList(s, forum.id).dataRows:
- print ' ', topic.title
- for post in discussion.service.getPostList(s, topic.id).dataRows:
- print ' ', post.title, post.createdDate, post.createdByUserName
- print post.content
- print
- break
- break
- break
-print news.service.getNewsPostList(s, p.id)
+ if os.path.exists(os.path.join(pfs_output_dir, pkg_path)):
+ os.utime(os.path.join(pfs_output_dir, pkg_path), (mtime, mtime))
+ sql_updates += _dir_sql(created_on, project, pkg.title.strip(), '')
+ # save pfs update sql for this project
+ with open(os.path.join(options.output_dir, 'pfs_updates.sql'), 'a') as out:
+ out.write('/* %s */' % project.id)
+ out.write(sql_updates)
+ save(json.dumps(frs_mapping), project, 'frs_mapping.json')
+def get_parser(defaults):
+ optparser = OptionParser(
+ usage=('%prog [--options] [projID projID projID]\n'
+ 'If no project ids are given, all projects will be migrated'))
+ optparser.set_defaults(**defaults)
+ # Command-line-only options
+ optparser.add_option(
+ '--extract-only', action='store_true', dest='extract',
+ help='Store data from the TeamForge API on the local filesystem; not load into Allura')
+ optparser.add_option(
+ '--load-only', action='store_true', dest='load',
+ help='Load into Allura previously-extracted data')
+ optparser.add_option(
+ '--config-file', dest='config_file',
+ help='Load options from config file')
+ # Command-line options with defaults in config file
+ optparser.add_option(
+ '--api-url', dest='api_url', help='e.g. https://hostname/ce-soap50/services/')
+ optparser.add_option(
+ '--attachment-url', dest='attachment_url')
+ optparser.add_option(
+ '--default-wiki-text', dest='default_wiki_text',
+ help='used in determining if a wiki page text is default or changed')
+ optparser.add_option(
+ '-u', '--username', dest='username')
+ optparser.add_option(
+ '-p', '--password', dest='password')
+ optparser.add_option(
+ '-o', '--output-dir', dest='output_dir')
+ optparser.add_option(
+ '--list-project-ids', action='store_true', dest='list_project_ids')
+ optparser.add_option(
+ '-n', '--neighborhood', dest='neighborhood',
+ help='Neighborhood full name, to load in to')
+ optparser.add_option(
+ '--n-shortname', dest='neighborhood_shortname',
+ help='Neighborhood shortname, for PFS extract SQL')
+ optparser.add_option(
+ '--skip-frs-download', action='store_true', dest='skip_frs_download')
+ optparser.add_option(
+ '--skip-unsupported-check', action='store_true', dest='skip_unsupported_check')
+ return optparser
if __name__ == '__main__':
@@ -295,6 +924,21 @@
+def test_convert_post_content():
+ text = '''rel100? or ?rel101 or rel102 or rel103a or rel104'''
+ mapping = dict(
+ rel100='rel/100/',
+ rel101='rel/101/',
+ rel102='rel/102/',
+ rel103='rel/103/',
+ rel104='rel/104/')
+ converted = convert_post_content(mapping, 'foo.bar', text)
+ assert 'href="/projects/foo.bar/files/rel/100' in converted, converted
+ assert 'href="/projects/foo.bar/files/rel/101' in converted, converted
+ assert 'href="/projects/foo.bar/files/rel/102' in converted, converted
+ assert 'href="/projects/foo.bar/files/rel/103' not in converted, converted
+ assert 'href="/projects/foo.bar/files/rel/104' in converted, converted
def test_convert_markup():
markup = '''
@@ -316,7 +960,7 @@
new_markup = wiki2markdown(markup)
assert '\n[[img src=myimage.jpg]]\n[[img src=anotherimage.jpg]]\n' in new_markup
assert '\n###this is the first' in new_markup
- assert '\n# Project Statistics' in new_markup
assert '<http://www.google.com>' in new_markup
assert '[SourceForge ](http://www.sf.net)' in new_markup
- assert '[sf:frsStatistics]' in new_markup
+ assert '\n# Project Statistics' not in new_markup
+ assert '[sf:frsStatistics]' not in new_markup