--- a/OSSEval/OSSEval/utils.py
+++ b/OSSEval/OSSEval/utils.py
@@ -2,6 +2,7 @@
from lxml import html
import urllib2
from apiclient.discovery import build
+from apiclient.errors import HttpError
class TrivialJSONEncoder(JSONEncoder):
def default(self, o):
@@ -30,7 +31,7 @@
try:
return int(xmldoc.attributes[tag].firstChild.data)
except:
- return -1
+ return None
class SearchEngine():
@staticmethod
@@ -38,18 +39,39 @@
return "Google"
@staticmethod
- def search_url(search_text, site_search=""):
- if site_search == "":
+ def search_url(search_text, sites=[]):
+ search_text = search_text.replace(":","%3A").replace("+", "%2B")
+ if len(sites) == 0:
return "https://www.google.com/?#q=" + search_text
else:
- return "https://www.google.com/?#q=" + search_text + "+site%3A" + site_search
+ if len(sites) == 1:
+ return "https://www.google.com/?#q=" + search_text + "+site%3A" + sites[0]
+ else:
+ query_url = "https://www.google.com/?#q=" + search_text + " ("
+ or_string = ""
+ for site in sites:
+ query_url += or_string + "site%3A" + site
+ or_string = " OR "
+ return query_url + ")"
@staticmethod
- def readable_query(search_text, site_search=""):
- return search_text.replace("%3A", ":").replace("%20", " ").replace("%2B", "+") + ("" if site_search=="" else " site:" + site_search)
+ def readable_query(search_text, sites=[]):
+ search_text = search_text.replace("%3A", ":").replace("%20", " ").replace("%2B", "+")
+ if len(sites) == 0:
+ return search_text
+ else:
+ if len(sites) == 1:
+ return search_text + "+site:" + sites[0]
+ else:
+ query_url = search_text + "("
+ or_string = ""
+ for site in sites:
+ query_url += or_string + "site:" + site
+ or_string = " OR "
+ return query_url + ")"
@staticmethod
- def search_count(search_text, site_search=""):
+ def search_count(search_text, sites=[]):
'''
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
exactTerms string Identifies a phrase that all documents in the search results must contain.
@@ -58,10 +80,20 @@
siteSearchFilter string Controls whether to include or exclude results from the site named in the siteSearch parameter.
Acceptable values are: "e": exclude "i": include
'''
- service = build("customsearch", "v1", developerKey="AIzaSyCAova46cAkHga_SZWTBqROdjoz1KcTlw8")
- print search_text + " " + site_search
- if site_search == "":
- res = service.cse().list(q=search_text, cx='017576662512468239146:omuauf_lfve',).execute()
- else:
- res = service.cse().list(q=search_text, siteSearch=site_search, cx='017576662512468239146:omuauf_lfve',).execute()
- return int(res['searchInformation']['totalResults'])
+ try:
+ service = build("customsearch", "v1", developerKey="AIzaSyCAova46cAkHga_SZWTBqROdjoz1KcTlw8")
+ total = 0
+ if len(sites) == 0:
+ res = service.cse().list(q=search_text, cx='017576662512468239146:omuauf_lfve',).execute()
+ total = int(res['searchInformation']['totalResults'])
+ else:
+ for site in sites:
+ res = service.cse().list(q=search_text, siteSearch=site, cx='017576662512468239146:omuauf_lfve',).execute()
+ total += int(res['searchInformation']['totalResults'])
+ return total
+ except HttpError as ex:
+ pass
+ @staticmethod
+ def trends_img(project_name):
+ return "<script type=\"text/javascript\" src=\"//www.google.com/trends/embed.js?q=" + project_name + "&content=1&cid=TIMESERIES_GRAPH_0&export=5&w=50&h=40\"></script>"
+