--- a/OSSEval/OSSEval/utils.py
+++ b/OSSEval/OSSEval/utils.py
@@ -12,11 +12,11 @@
from json import JSONEncoder
-from lxml import html
-import urllib2
from apiclient.discovery import build
from apiclient.errors import HttpError
import OSSEval
+import urllib2
+from lxml import html
class Configuration():
google_developerKey = OSSEval.google_developerKey
@@ -53,23 +53,44 @@
class SearchEngine():
@staticmethod
def search__engine_name():
- return "Google"
+ return "Gigablast"
@staticmethod
- def search_url(search_text, sites=[]):
+ def search_url_parameters(search_text, sites=[]):
search_text = search_text.replace(":","%3A").replace("+", "%2B")
if len(sites) == 0:
- return "https://www.google.com/?#q=" + search_text
+ return "q=" + search_text
else:
if len(sites) == 1:
- return "https://www.google.com/?#q=" + search_text + "+site%3A" + sites[0]
+ return "q=" + search_text + "+site%3A" + sites[0]
else:
- query_url = "https://www.google.com/?#q=" + search_text + " ("
+ query_url = "q=" + search_text + " ("
or_string = ""
for site in sites:
query_url += or_string + "site%3A" + site
or_string = " OR "
return query_url + ")"
+
+ @staticmethod
+ def gigablast_search_url(search_text, sites=[]):
+ return "http://www.gigablast.com/search?" + SearchEngine.search_url_parameters(search_text, sites)
+
+ @staticmethod
+ def google_search_url(search_text, sites=[]):
+ return "https://www.google.com/?" + SearchEngine.search_url_parameters(search_text, sites)
+# search_text = search_text.replace(":","%3A").replace("+", "%2B")
+# if len(sites) == 0:
+# return "https://www.google.com/?#q=" + search_text
+# else:
+# if len(sites) == 1:
+# return "https://www.google.com/?#q=" + search_text + "+site%3A" + sites[0]
+# else:
+# query_url = "https://www.google.com/?#q=" + search_text + " ("
+# or_string = ""
+# for site in sites:
+# query_url += or_string + "site%3A" + site
+# or_string = " OR "
+# return query_url + ")"
@staticmethod
def readable_query(search_text, sites=[]):
@@ -88,7 +109,33 @@
return query_url + ")"
@staticmethod
+ def search_count_scraping(search_text, sites=[]):
+ try:
+ response = urllib2.urlopen(SearchEngine.gigablast_search_url(search_text, sites).replace(" ", "%20"))
+ html_page = response.read()
+ # "Results <b>1</b> to <b>10</b> of about <b>5,278,952</b>"
+ # "</form>No results found for <b>"
+ str_no_results = "</form>No results found for <b>"
+ if str_no_results in html_page:
+ return 0
+ str_before = "</b> of about <b>"
+ str_after = "</b>"
+ temp_string = html_page[html_page.find(str_before) + len(str_before):]
+ total = temp_string[:temp_string.find(str_after)].replace(",","")
+ return total
+ except HttpError as ex:
+ return -1
+
+ @staticmethod
def search_count(search_text, sites=[]):
+ return SearchEngine.search_count_scraping(search_text, sites)
+
+ @staticmethod
+ def search_url(search_text, sites=[]):
+ return SearchEngine.gigablast_search_url(search_text, sites)
+
+ @staticmethod
+ def google_search_count(search_text, sites=[]):
'''
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
exactTerms string Identifies a phrase that all documents in the search results must contain.
@@ -109,9 +156,9 @@
total += int(res['searchInformation']['totalResults'])
return total
except HttpError as ex:
- pass
+ return -1
@staticmethod
def trends_img(project_name):
return "<script type=\"text/javascript\" src=\"//www.google.com/trends/embed.js?q=" + project_name + "&content=1&cid=TIMESERIES_GRAPH_0&export=5&w=50&h=40\"></script>"
-
+