# This Source Code Form of OSSEval is subject to the terms of the GNU AFFERO
# GENERAL PUBLIC LICENSE, v. 3.0. If a copy of the AGPL was not
# distributed with this file, You can obtain one at http://www.gnu.org/licenses/agpl.txt
#
# OSSeval is powered by the SOS Open Source AGPL edition.
# The AGPL requires that you do not remove the SOS Open Source attribution and copyright
# notices from the user interface (see section 5.d below).
# OSSEval Copyright 2014 Bitergium SLL
# SOS Open Source Copyright 2012 Roberto Galoppini
# Author: Davide Galletti
from json import JSONEncoder
from apiclient.discovery import build
from apiclient.errors import HttpError
import OSSEval
import urllib2
from lxml import html
class Configuration():
google_developerKey = OSSEval.google_developerKey
class TrivialJSONEncoder(JSONEncoder):
def default(self, o):
return o.__dict__
class xmlMinidom():
@staticmethod
def getString(xmldoc, tag):
try:
return xmldoc.getElementsByTagName(tag)[0].firstChild.data
except:
return ""
@staticmethod
def getStringAttribute(xmldoc, tag):
try:
return xmldoc.attributes[tag].firstChild.data
except:
return ""
@staticmethod
def getNaturalAttribute(xmldoc, tag):
'''
a natural number; if it's not there -1 is returned
'''
try:
return int(xmldoc.attributes[tag].firstChild.data)
except:
return None
class SearchEngine():
@staticmethod
def search__engine_name():
return "Gigablast"
@staticmethod
def search_url_parameters(search_text, sites=[]):
search_text = search_text.replace(":","%3A").replace("+", "%2B")
if len(sites) == 0:
return "q=" + search_text
else:
if len(sites) == 1:
return "q=" + search_text + "+site%3A" + sites[0]
else:
query_url = "q=" + search_text + " ("
or_string = ""
for site in sites:
query_url += or_string + "site%3A" + site
or_string = " OR "
return query_url + ")"
@staticmethod
def gigablast_search_url(search_text, sites=[]):
return "http://www.gigablast.com/search?" + SearchEngine.search_url_parameters(search_text, sites)
@staticmethod
def google_search_url(search_text, sites=[]):
return "https://www.google.com/?" + SearchEngine.search_url_parameters(search_text, sites)
# search_text = search_text.replace(":","%3A").replace("+", "%2B")
# if len(sites) == 0:
# return "https://www.google.com/?#q=" + search_text
# else:
# if len(sites) == 1:
# return "https://www.google.com/?#q=" + search_text + "+site%3A" + sites[0]
# else:
# query_url = "https://www.google.com/?#q=" + search_text + " ("
# or_string = ""
# for site in sites:
# query_url += or_string + "site%3A" + site
# or_string = " OR "
# return query_url + ")"
@staticmethod
def readable_query(search_text, sites=[]):
search_text = search_text.replace("%3A", ":").replace("%20", " ").replace("%2B", "+")
if len(sites) == 0:
return search_text
else:
if len(sites) == 1:
return search_text + "+site:" + sites[0]
else:
query_url = search_text + "("
or_string = ""
for site in sites:
query_url += or_string + "site:" + site
or_string = " OR "
return query_url + ")"
@staticmethod
def search_count_scraping(search_text, sites=[]):
try:
response = urllib2.urlopen(SearchEngine.gigablast_search_url(search_text, sites).replace(" ", "%20"))
html_page = response.read()
# "Results <b>1</b> to <b>10</b> of about <b>5,278,952</b>"
# "</form>No results found for <b>"
str_no_results = "</form>No results found for <b>"
if str_no_results in html_page:
return 0
str_before = "</b> of about <b>"
str_after = "</b>"
temp_string = html_page[html_page.find(str_before) + len(str_before):]
total = temp_string[:temp_string.find(str_after)].replace(",","")
return total
except HttpError as ex:
return -1
@staticmethod
def search_count(search_text, sites=[]):
return SearchEngine.search_count_scraping(search_text, sites)
@staticmethod
def search_url(search_text, sites=[]):
return SearchEngine.gigablast_search_url(search_text, sites)
@staticmethod
def google_search_count(search_text, sites=[]):
'''
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
exactTerms string Identifies a phrase that all documents in the search results must contain.
excludeTerms string Identifies a word or phrase that should not appear in any documents in the search results.
siteSearch string Specifies all search results should be pages from a given site.
siteSearchFilter string Controls whether to include or exclude results from the site named in the siteSearch parameter.
Acceptable values are: "e": exclude "i": include
'''
try:
service = build("customsearch", "v1", developerKey=OSSEval.google_developerKey)
total = 0
if len(sites) == 0:
res = service.cse().list(q=search_text, cx='017576662512468239146:omuauf_lfve',).execute()
total = int(res['searchInformation']['totalResults'])
else:
for site in sites:
res = service.cse().list(q=search_text, siteSearch=site, cx='017576662512468239146:omuauf_lfve',).execute()
total += int(res['searchInformation']['totalResults'])
return total
except HttpError as ex:
return -1
@staticmethod
def trends_img(project_name):
return "<script type=\"text/javascript\" src=\"//www.google.com/trends/embed.js?q=" + project_name + "&content=1&cid=TIMESERIES_GRAPH_0&export=5&w=50&h=40\"></script>"