--- a/OSSEval/OSProject/models.py
+++ b/OSSEval/OSProject/models.py
@@ -1,5 +1,8 @@
+from datetime import date, datetime
+from urlparse import urlparse
+
from django.db import models
-#from views import search_html, instance_list_html
+
from ohloh import OhlohProxy
from flossmole import FSF, FC, GC, GH
from utils import Forges
@@ -11,20 +14,77 @@
name = models.CharField(max_length=200)
instance = models.OneToOneField('analysis.Instance', related_name='actual_instance', primary_key=True)
- def __unicode__(self):
+ def __str__(self):
return self.name + " - " + self.instance.name
- def getProjectInfo(self):
- osprojectforge_info = {}
+ def getInstanceInfo(self):
+ '''
+ Part of the required interface
+ i is a dictionary; each source information is withing the source name key, e.g.
+ i['FM']
+ i['OHLOH']
+ i['FSF']
+ i['GC']
+ i['GH']
+ ...
+ Some information is extracted from specific sources and made available at root level; e.g.
+ i['days'] number of days since the project has been registered; it is calculated using i['FM']['date_added'] or i['OHLOH']['created_at']
+ ...
+ '''
+ i = {}
for ospf in self.osprojectforge_set.all():
- osprojectforge_info[ospf.forge.name] = ospf.getProjectInfo()
- return osprojectforge_info
+ i[ospf.forge.name] = ospf.getProjectInfo()
+ # From now on you find heuristics that try to reasonably define a single value where you have several sources of information
+ # e.g. project is created 1/1/2012 according to Github and 4/2/2007 according to FSF; the heuristic will choose the remotest date
+
+ # we need a name to search on search engines
+ if self.instance.name_for_search <> "":
+ i['name'] = self.instance.name_for_search
+ else:
+ i['name'] = self.name
+
+ # some searches on search engines have to be restricted to project site; let's try to guess it
+ # the order is given by a heuristic based on data reliability
+ i['homepage'] = ""
+ try: i['homepage'] = i['FSF']['real_url']
+ except: pass
+ try: i['homepage'] = i['GH']['homepage']
+ except: pass
+ try: i['homepage'] = i['FM']['url_homepage']
+ except: pass
+ try: i['homepage'] = i['OHLOH']['homepage_url']
+ except: pass
+
+ if i['homepage'] <> "":
+ try:
+ i['siteforsearchengine'] = urlparse(i['homepage']).netloc
+ if i['siteforsearchengine'][:4] == "www.":
+ i['siteforsearchengine'] = i['siteforsearchengine'][4:]
+ except: i['siteforsearchengine'] = ""
+
+ # default to -1 e.g. no value; the oldest creation dates wins; GC and GH have no info; FSF released_on seems to have the same semantic
+ i['days'] = -1
+ try:
+ i['OHLOH']['days'] = (datetime.today() - datetime.strptime(i['OHLOH']['created_at'], '%Y-%m-%dT%H:%M:%SZ')).days
+ if i['OHLOH']['days'] > i['days']: i['days'] = i['OHLOH']['days']
+ except: pass
+ try:
+ i['FM']['days'] = (datetime.today() - datetime.strptime(i['FM']['date_added'], '%Y-%m-%d %H:%M:%S\n')).days
+ if i['FM']['days'] > i['days']: i['days'] = i['FM']['days']
+ except: pass
+ try:
+ i['FSF']['released_on'] = (date.today() - i['FSF']['released_on']).days
+ if i['FSF']['days'] > i['days']: i['days'] = i['FSF']['days']
+ except: pass
+
+ return i
+
class Forge(models.Model):
name = models.CharField(max_length=50)
url = models.CharField(max_length=200)
os_projects = models.ManyToManyField(OSProject, through='OSProjectForge', blank=True)
- def __unicode__(self):
+ def __str__(self):
return self.name + " - " + self.url
class OSProjectForge(models.Model):
@@ -33,7 +93,7 @@
os_project = models.ForeignKey(OSProject)
identifier_in_forge = models.CharField(max_length=200)
datasource_id = models.IntegerField()
- def __unicode__(self):
+ def __str__(self):
return self.name + " - " + self.forge.name
def getProjectInfo(self):
if self.forge.id == Forges.OHLOH: