from datetime import date, datetime
from urlparse import urlparse
from django.db import models
from ohloh import OhlohProxy
from flossmole import FSF, FC, GC, GH
from utils import Forges
from OSSEval.utils import xmlMinidom
class OSProject(models.Model):
"""
"""
name = models.CharField(max_length=200)
instance = models.OneToOneField('analysis.Instance', related_name='actual_instance', primary_key=True)
def __str__(self):
return self.name + " - " + self.instance.name
def to_xml(self):
str_xml = "<OSProjectForges>"
for osprojectforge in self.osprojectforge_set.all():
str_xml += osprojectforge.to_xml()
str_xml += "</OSProjectForges>"
return '<OSProject Name="' + self.name + '">' + str_xml + "</OSProject>"
def from_xml(self, xmldoc, instance, insert = True):
self.name = xmlMinidom.getStringAttribute(xmldoc, 'Name')
self.instance = instance
self.save()
#OSProjectForge
xml_osproject_forges = xmldoc.getElementsByTagName('OSProjectForge')
for xml_osproject_forge in xml_osproject_forges:
ospf = OSProjectForge()
ospf.from_xml(xml_osproject_forge, self, insert)
def getInstanceInfo(self):
'''
Part of the required interface
i is a dictionary; each source information is within the source name key, e.g.
i['FM']
i['OHLOH']
i['FSF']
i['GC']
i['GH']
...
Some information is extracted from specific sources and made available at root level; e.g.
i['days'] number of days since the project has been registered; it is calculated using i['FM']['date_added'] or i['OHLOH']['created_at']
...
'''
i = {}
for ospf in self.osprojectforge_set.all():
i[ospf.forge.name] = ospf.getProjectInfo()
# From now on you find heuristics that try to reasonably define a single value where you have several sources of information
# e.g. project is created 1/1/2012 according to Github and 4/2/2007 according to FSF; the heuristic will choose the remotest date
# we need a name to search on search engines
if self.instance.name_for_search <> "":
i['name'] = self.instance.name_for_search
else:
i['name'] = self.name
# some searches on search engines have to be restricted to project site; let's try to guess it
# the order is given by a heuristic based on data reliability
i['homepage'] = ""
try: i['homepage'] = i['FSF']['real_url']
except: pass
try: i['homepage'] = i['GH']['homepage']
except: pass
try: i['homepage'] = i['FM']['url_homepage']
except: pass
try: i['homepage'] = i['OHLOH']['homepage_url']
except: pass
# TODO: i['documentation_url'] i['faq_url'] i['wiki_url']
# i['numBugsTotal']
# svn was in Melquiades; is it in any other source?
if i['homepage'] <> "":
try:
i['siteforsearchengine'] = urlparse(i['homepage']).netloc
if i['siteforsearchengine'][:4] == "www.":
i['siteforsearchengine'] = i['siteforsearchengine'][4:]
except: i['siteforsearchengine'] = ""
# default to -1 e.g. no value; the oldest creation dates wins; GC and GH have no info; FSF released_on seems to have the same semantic
i['days'] = -1
try:
i['OHLOH']['days'] = (datetime.today() - datetime.strptime(i['OHLOH']['created_at'], '%Y-%m-%dT%H:%M:%SZ')).days
if i['OHLOH']['days'] > i['days']: i['days'] = i['OHLOH']['days']
except: pass
try:
i['FM']['days'] = (datetime.today() - datetime.strptime(i['FM']['date_added'], '%Y-%m-%d %H:%M:%S\n')).days
if i['FM']['days'] > i['days']: i['days'] = i['FM']['days']
except: pass
try:
i['FSF']['released_on'] = (date.today() - i['FSF']['released_on']).days
if i['FSF']['days'] > i['days']: i['days'] = i['FSF']['days']
except: pass
try:
i['download_url'] = i['OHLOH']['download_url']
except: pass
# CLEANUP
try: i['FM']['popularity_score'] = (i['FM']['popularity_score']).replace('\n','').strip()
except: pass
try: i['FM']['rating'] = (i['FM']['rating']).replace('\n','').strip()
except: pass
try: i['FM']['rating_count'] = (i['FM']['rating_count']).replace('\n','').strip()
except: pass
try: i['FM']['url_bugtracker'] = (i['FM']['url_bugtracker']).replace('\n','').strip()
except: pass
try:
if 'OHLOH' in i.keys() and 'size_fact' in i['OHLOH'].keys() and 'comment_ratio' in i['OHLOH']['size_fact'].keys():
fl = float(i['OHLOH']['size_fact']['comment_ratio'])
# _pp stands for pretty print
i['OHLOH']['size_fact']['comment_ratio_pp'] = str(round(fl*100,1))+"%"
except: pass
return i
class Forge(models.Model):
name = models.CharField(max_length=50)
url = models.CharField(max_length=200)
os_projects = models.ManyToManyField(OSProject, through='OSProjectForge', blank=True)
def __str__(self):
return self.name + " - " + self.url
def to_xml(self):
return '<Forge Id="' + str(self.id) + '" Name="' + self.name + '" URL="' + self.url + '" />'
#forges do not get added using xml, neither updated; they're are just a reference for OSProjectForge
#hence there is no "from_xml"
class OSProjectForge(models.Model):
name = models.CharField(max_length=200)
forge = models.ForeignKey(Forge)
os_project = models.ForeignKey(OSProject)
identifier_in_forge = models.CharField(max_length=200)
datasource_id = models.IntegerField()
def to_xml(self):
str_xml = self.forge.to_xml()
return '<OSProjectForge Id="' + str(self.id) + '" Name="' + self.name + '" IdentifierInForge="' + str(self.identifier_in_forge) + '" DatasourceId="' + str(self.datasource_id) + '">' + str_xml + "</OSProjectForge>"
def from_xml(self, xmldoc, os_project, insert = True):
if not insert:
self.id = xmlMinidom.getNaturalAttribute(xmldoc, 'Id')
self.name = xmlMinidom.getStringAttribute(xmldoc, 'Name')
self.identifier_in_forge = xmlMinidom.getStringAttribute(xmldoc, 'IdentifierInForge')
self.datasource_id = xmlMinidom.getNaturalAttribute(xmldoc, 'DatasourceId')
self.os_project = os_project
#Forge
xml_forge = xmldoc.getElementsByTagName('Forge')[0]
f = Forge.objects.get(pk=xmlMinidom.getNaturalAttribute(xml_forge, 'Id'))
self.forge = f
self.save()
def __str__(self):
return self.name + " - " + self.forge.name
def getProjectInfo(self):
if self.forge.id == Forges.OHLOH:
osprojectforge_info = OhlohProxy.getProjectInfo(self.identifier_in_forge)
if self.forge.id == Forges.FC:
osprojectforge_info = FC.getProjectInfo(self.identifier_in_forge)
if self.forge.id == Forges.FSF:
osprojectforge_info = FSF.getProjectInfo(self.identifier_in_forge)
if self.forge.id == Forges.GC:
osprojectforge_info = GC.getProjectInfo(self.identifier_in_forge)
if self.forge.id == Forges.GH:
osprojectforge_info = GH.getProjectInfo(self.identifier_in_forge)
return osprojectforge_info