Parent: [48e073] (diff)

Child: [831432] (diff)

Download this file

models.py    191 lines (163 with data), 8.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# This Source Code Form of OSSEval is subject to the terms of the GNU AFFERO
# GENERAL PUBLIC LICENSE, v. 3.0. If a copy of the AGPL was not
# distributed with this file, You can obtain one at http://www.gnu.org/licenses/agpl.txt
#
# OSSeval is powered by the SOS Open Source AGPL edition.
# The AGPL requires that you do not remove the SOS Open Source attribution and copyright
# notices from the user interface (see section 5.d below).
# Commercial licenses are available and do not require any SOS Open Source attributions
# or visible copyright notices but they are not permitted under this license.
# OSSEval Copyright 2014 Bitergium SLL
# SOS Open Source Copyright 2012 Roberto Galoppini
# Author: Davide Galletti
from datetime import date, datetime
from urlparse import urlparse
from django.db import models
from ohloh import OhlohProxy
from flossmole import FSF, FC, GC, GH
from utils import Forges
from OSSEval.utils import xmlMinidom
class OSProject(models.Model):
"""
"""
name = models.CharField(max_length=200)
instance = models.OneToOneField('analysis.Instance', related_name='actual_instance', primary_key=True)
def __str__(self):
return self.name + " - " + self.instance.name
def to_xml(self):
str_xml = "<OSProjectForges>"
for osprojectforge in self.osprojectforge_set.all():
str_xml += osprojectforge.to_xml()
str_xml += "</OSProjectForges>"
return '<OSProject Name="' + self.name + '">' + str_xml + "</OSProject>"
def from_xml(self, xmldoc, instance, insert = True):
self.name = xmlMinidom.getStringAttribute(xmldoc, 'Name')
self.instance = instance
self.save()
#OSProjectForge
xml_osproject_forges = xmldoc.getElementsByTagName('OSProjectForge')
for xml_osproject_forge in xml_osproject_forges:
ospf = OSProjectForge()
ospf.from_xml(xml_osproject_forge, self, insert)
def getInstanceInfo(self):
'''
Part of the required interface
i is a dictionary; each source information is within the source name key, e.g.
i['FM']
i['OHLOH']
i['FSF']
i['GC']
i['GH']
...
Some information is extracted from specific sources and made available at root level; e.g.
i['days'] number of days since the project has been registered; it is calculated using i['FM']['date_added'] or i['OHLOH']['created_at']
...
'''
i = {}
for ospf in self.osprojectforge_set.all():
i[ospf.forge.name] = ospf.getProjectInfo()
# From now on you find heuristics that try to reasonably define a single value where you have several sources of information
# e.g. project is created 1/1/2012 according to Github and 4/2/2007 according to FSF; the heuristic will choose the remotest date
# we need a name to search on search engines
if self.instance.name_for_search <> "":
i['name'] = self.instance.name_for_search
else:
i['name'] = self.name
# some searches on search engines have to be restricted to project site; let's try to guess it
# the order is given by a heuristic based on data reliability
i['homepage'] = ""
try: i['homepage'] = i['FSF']['real_url']
except: pass
try: i['homepage'] = i['GH']['homepage']
except: pass
try: i['homepage'] = i['FM']['url_homepage']
except: pass
try: i['homepage'] = i['OHLOH']['homepage_url']
except: pass
# TODO: i['documentation_url'] i['faq_url'] i['wiki_url']
# i['numBugsTotal']
# svn was in Melquiades; is it in any other source?
if i['homepage'] <> "":
try:
i['siteforsearchengine'] = urlparse(i['homepage']).netloc
if i['siteforsearchengine'][:4] == "www.":
i['siteforsearchengine'] = i['siteforsearchengine'][4:]
except: i['siteforsearchengine'] = ""
# default to -1 e.g. no value; the oldest creation dates wins; GC and GH have no info; FSF released_on seems to have the same semantic
i['days'] = -1
try:
i['OHLOH']['days'] = (datetime.today() - datetime.strptime(i['OHLOH']['created_at'], '%Y-%m-%dT%H:%M:%SZ')).days
if i['OHLOH']['days'] > i['days']: i['days'] = i['OHLOH']['days']
except: pass
try:
i['FM']['days'] = (datetime.today() - datetime.strptime(i['FM']['date_added'], '%Y-%m-%d %H:%M:%S\n')).days
if i['FM']['days'] > i['days']: i['days'] = i['FM']['days']
except: pass
try:
i['FSF']['released_on'] = (date.today() - i['FSF']['released_on']).days
if i['FSF']['days'] > i['days']: i['days'] = i['FSF']['days']
except: pass
try:
i['download_url'] = i['OHLOH']['download_url']
except: pass
# CLEANUP
try: i['FM']['popularity_score'] = (i['FM']['popularity_score']).replace('\n','').strip()
except: pass
try: i['FM']['rating'] = (i['FM']['rating']).replace('\n','').strip()
except: pass
try: i['FM']['rating_count'] = (i['FM']['rating_count']).replace('\n','').strip()
except: pass
try: i['FM']['url_bugtracker'] = (i['FM']['url_bugtracker']).replace('\n','').strip()
except: pass
try:
if 'OHLOH' in i.keys() and 'size_fact' in i['OHLOH'].keys() and 'comment_ratio' in i['OHLOH']['size_fact'].keys():
fl = float(i['OHLOH']['size_fact']['comment_ratio'])
# _pp stands for pretty print
i['OHLOH']['size_fact']['comment_ratio_pp'] = str(round(fl*100,1))+"%"
except: pass
return i
class Forge(models.Model):
name = models.CharField(max_length=50)
url = models.CharField(max_length=200)
os_projects = models.ManyToManyField(OSProject, through='OSProjectForge', blank=True)
def __str__(self):
return self.name + " - " + self.url
def to_xml(self):
return '<Forge Id="' + str(self.id) + '" Name="' + self.name + '" URL="' + self.url + '" />'
#forges do not get added using xml, neither updated; they're are just a reference for OSProjectForge
#hence there is no "from_xml"
class OSProjectForge(models.Model):
name = models.CharField(max_length=200)
forge = models.ForeignKey(Forge)
os_project = models.ForeignKey(OSProject)
identifier_in_forge = models.CharField(max_length=200)
datasource_id = models.IntegerField()
def to_xml(self):
str_xml = self.forge.to_xml()
return '<OSProjectForge Id="' + str(self.id) + '" Name="' + self.name + '" IdentifierInForge="' + str(self.identifier_in_forge) + '" DatasourceId="' + str(self.datasource_id) + '">' + str_xml + "</OSProjectForge>"
def from_xml(self, xmldoc, os_project, insert = True):
if not insert:
self.id = xmlMinidom.getNaturalAttribute(xmldoc, 'Id')
self.name = xmlMinidom.getStringAttribute(xmldoc, 'Name')
self.identifier_in_forge = xmlMinidom.getStringAttribute(xmldoc, 'IdentifierInForge')
self.datasource_id = xmlMinidom.getNaturalAttribute(xmldoc, 'DatasourceId')
self.os_project = os_project
#Forge
xml_forge = xmldoc.getElementsByTagName('Forge')[0]
f = Forge.objects.get(pk=xmlMinidom.getNaturalAttribute(xml_forge, 'Id'))
self.forge = f
self.save()
def __str__(self):
return self.name + " - " + self.forge.name
def getProjectInfo(self):
if self.forge.id == Forges.OHLOH:
osprojectforge_info = OhlohProxy.getProjectInfo(self.identifier_in_forge)
if self.forge.id == Forges.FC:
osprojectforge_info = FC.getProjectInfo(self.identifier_in_forge)
if self.forge.id == Forges.FSF:
osprojectforge_info = FSF.getProjectInfo(self.identifier_in_forge)
if self.forge.id == Forges.GC:
osprojectforge_info = GC.getProjectInfo(self.identifier_in_forge)
if self.forge.id == Forges.GH:
osprojectforge_info = GH.getProjectInfo(self.identifier_in_forge)
return osprojectforge_info