|
a/OSSEval/OSSEval/utils.py |
|
b/OSSEval/OSSEval/utils.py |
1 |
from json import JSONEncoder
|
1 |
from json import JSONEncoder
|
2 |
from lxml import html
|
2 |
from lxml import html
|
3 |
import urllib2
|
3 |
import urllib2
|
4 |
from apiclient.discovery import build
|
4 |
from apiclient.discovery import build
|
|
|
5 |
from apiclient.errors import HttpError
|
5 |
|
6 |
|
6 |
class TrivialJSONEncoder(JSONEncoder):
|
7 |
class TrivialJSONEncoder(JSONEncoder):
|
7 |
def default(self, o):
|
8 |
def default(self, o):
|
8 |
return o.__dict__
|
9 |
return o.__dict__
|
9 |
|
10 |
|
|
... |
|
... |
28 |
a natural number; if it's not there -1 is returned
|
29 |
a natural number; if it's not there -1 is returned
|
29 |
'''
|
30 |
'''
|
30 |
try:
|
31 |
try:
|
31 |
return int(xmldoc.attributes[tag].firstChild.data)
|
32 |
return int(xmldoc.attributes[tag].firstChild.data)
|
32 |
except:
|
33 |
except:
|
33 |
return -1
|
34 |
return None
|
34 |
|
35 |
|
35 |
class SearchEngine():
|
36 |
class SearchEngine():
|
36 |
@staticmethod
|
37 |
@staticmethod
|
37 |
def search__engine_name():
|
38 |
def search__engine_name():
|
38 |
return "Google"
|
39 |
return "Google"
|
39 |
|
40 |
|
40 |
@staticmethod
|
41 |
@staticmethod
|
41 |
def search_url(search_text, site_search=""):
|
42 |
def search_url(search_text, sites=[]):
|
|
|
43 |
search_text = search_text.replace(":","%3A").replace("+", "%2B")
|
42 |
if site_search == "":
|
44 |
if len(sites) == 0:
|
43 |
return "https://www.google.com/?#q=" + search_text
|
45 |
return "https://www.google.com/?#q=" + search_text
|
44 |
else:
|
46 |
else:
|
|
|
47 |
if len(sites) == 1:
|
45 |
return "https://www.google.com/?#q=" + search_text + "+site%3A" + site_search
|
48 |
return "https://www.google.com/?#q=" + search_text + "+site%3A" + sites[0]
|
|
|
49 |
else:
|
|
|
50 |
query_url = "https://www.google.com/?#q=" + search_text + " ("
|
|
|
51 |
or_string = ""
|
|
|
52 |
for site in sites:
|
|
|
53 |
query_url += or_string + "site%3A" + site
|
|
|
54 |
or_string = " OR "
|
|
|
55 |
return query_url + ")"
|
46 |
|
56 |
|
47 |
@staticmethod
|
57 |
@staticmethod
|
48 |
def readable_query(search_text, site_search=""):
|
58 |
def readable_query(search_text, sites=[]):
|
49 |
return search_text.replace("%3A", ":").replace("%20", " ").replace("%2B", "+") + ("" if site_search=="" else " site:" + site_search)
|
59 |
search_text = search_text.replace("%3A", ":").replace("%20", " ").replace("%2B", "+")
|
|
|
60 |
if len(sites) == 0:
|
|
|
61 |
return search_text
|
|
|
62 |
else:
|
|
|
63 |
if len(sites) == 1:
|
|
|
64 |
return search_text + "+site:" + sites[0]
|
|
|
65 |
else:
|
|
|
66 |
query_url = search_text + "("
|
|
|
67 |
or_string = ""
|
|
|
68 |
for site in sites:
|
|
|
69 |
query_url += or_string + "site:" + site
|
|
|
70 |
or_string = " OR "
|
|
|
71 |
return query_url + ")"
|
50 |
|
72 |
|
51 |
@staticmethod
|
73 |
@staticmethod
|
52 |
def search_count(search_text, site_search=""):
|
74 |
def search_count(search_text, sites=[]):
|
53 |
'''
|
75 |
'''
|
54 |
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
|
76 |
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
|
55 |
exactTerms string Identifies a phrase that all documents in the search results must contain.
|
77 |
exactTerms string Identifies a phrase that all documents in the search results must contain.
|
56 |
excludeTerms string Identifies a word or phrase that should not appear in any documents in the search results.
|
78 |
excludeTerms string Identifies a word or phrase that should not appear in any documents in the search results.
|
57 |
siteSearch string Specifies all search results should be pages from a given site.
|
79 |
siteSearch string Specifies all search results should be pages from a given site.
|
58 |
siteSearchFilter string Controls whether to include or exclude results from the site named in the siteSearch parameter.
|
80 |
siteSearchFilter string Controls whether to include or exclude results from the site named in the siteSearch parameter.
|
59 |
Acceptable values are: "e": exclude "i": include
|
81 |
Acceptable values are: "e": exclude "i": include
|
60 |
'''
|
82 |
'''
|
|
|
83 |
try:
|
61 |
service = build("customsearch", "v1", developerKey="AIzaSyCAova46cAkHga_SZWTBqROdjoz1KcTlw8")
|
84 |
service = build("customsearch", "v1", developerKey="AIzaSyCAova46cAkHga_SZWTBqROdjoz1KcTlw8")
|
62 |
print search_text + " " + site_search
|
85 |
total = 0
|
63 |
if site_search == "":
|
86 |
if len(sites) == 0:
|
64 |
res = service.cse().list(q=search_text, cx='017576662512468239146:omuauf_lfve',).execute()
|
87 |
res = service.cse().list(q=search_text, cx='017576662512468239146:omuauf_lfve',).execute()
|
|
|
88 |
total = int(res['searchInformation']['totalResults'])
|
65 |
else:
|
89 |
else:
|
|
|
90 |
for site in sites:
|
66 |
res = service.cse().list(q=search_text, siteSearch=site_search, cx='017576662512468239146:omuauf_lfve',).execute()
|
91 |
res = service.cse().list(q=search_text, siteSearch=site, cx='017576662512468239146:omuauf_lfve',).execute()
|
67 |
return int(res['searchInformation']['totalResults'])
|
92 |
total += int(res['searchInformation']['totalResults'])
|
|
|
93 |
return total
|
|
|
94 |
except HttpError as ex:
|
|
|
95 |
pass
|
|
|
96 |
@staticmethod
|
|
|
97 |
def trends_img(project_name):
|
|
|
98 |
return "<script type=\"text/javascript\" src=\"//www.google.com/trends/embed.js?q=" + project_name + "&content=1&cid=TIMESERIES_GRAPH_0&export=5&w=50&h=40\"></script>"
|
|
|
99 |
|