Switch to unified view

a/OSSEval/OSSEval/utils.py b/OSSEval/OSSEval/utils.py
...
...
10
# SOS Open Source Copyright 2012 Roberto Galoppini
10
# SOS Open Source Copyright 2012 Roberto Galoppini
11
# Author: Davide Galletti 
11
# Author: Davide Galletti 
12
12
13
13
14
from json import JSONEncoder
14
from json import JSONEncoder
15
from lxml import html
16
import urllib2
17
from apiclient.discovery import build
15
from apiclient.discovery import build
18
from apiclient.errors import HttpError
16
from apiclient.errors import HttpError
19
import OSSEval
17
import OSSEval
18
import urllib2
19
from lxml import html
20
20
21
class Configuration():
21
class Configuration():
22
    google_developerKey = OSSEval.google_developerKey
22
    google_developerKey = OSSEval.google_developerKey
23
23
24
class TrivialJSONEncoder(JSONEncoder):
24
class TrivialJSONEncoder(JSONEncoder):
...
...
51
            return None
51
            return None
52
        
52
        
53
class SearchEngine():
53
class SearchEngine():
54
    @staticmethod
54
    @staticmethod
55
    def search__engine_name():
55
    def search__engine_name():
56
        return "Google"
56
        return "Gigablast"
57
    
57
    
58
    @staticmethod
58
    @staticmethod
59
    def search_url(search_text, sites=[]):
59
    def search_url_parameters(search_text, sites=[]):
60
        search_text = search_text.replace(":","%3A").replace("+", "%2B")
60
        search_text = search_text.replace(":","%3A").replace("+", "%2B")
61
        if len(sites) == 0:
61
        if len(sites) == 0:
62
            return "https://www.google.com/?#q=" + search_text
62
            return "q=" + search_text
63
        else:
63
        else:
64
            if len(sites) == 1:
64
            if len(sites) == 1:
65
                return "https://www.google.com/?#q=" + search_text + "+site%3A" + sites[0]
65
                return "q=" + search_text + "+site%3A" + sites[0]
66
            else:
66
            else:
67
                query_url = "https://www.google.com/?#q=" + search_text + " ("
67
                query_url = "q=" + search_text + " ("
68
                or_string = ""
68
                or_string = ""
69
                for site in sites:
69
                for site in sites:
70
                    query_url += or_string + "site%3A" + site
70
                    query_url += or_string + "site%3A" + site
71
                    or_string = " OR "
71
                    or_string = " OR "
72
                return query_url + ")"
72
                return query_url + ")"
73
74
    @staticmethod
75
    def gigablast_search_url(search_text, sites=[]):
76
        return "http://www.gigablast.com/search?" + SearchEngine.search_url_parameters(search_text, sites)
77
78
    @staticmethod
79
    def google_search_url(search_text, sites=[]):
80
        return "https://www.google.com/?" + SearchEngine.search_url_parameters(search_text, sites)
81
#         search_text = search_text.replace(":","%3A").replace("+", "%2B")
82
#         if len(sites) == 0:
83
#             return "https://www.google.com/?#q=" + search_text
84
#         else:
85
#             if len(sites) == 1:
86
#                 return "https://www.google.com/?#q=" + search_text + "+site%3A" + sites[0]
87
#             else:
88
#                 query_url = "https://www.google.com/?#q=" + search_text + " ("
89
#                 or_string = ""
90
#                 for site in sites:
91
#                     query_url += or_string + "site%3A" + site
92
#                     or_string = " OR "
93
#                 return query_url + ")"
73
94
74
    @staticmethod
95
    @staticmethod
75
    def readable_query(search_text, sites=[]):
96
    def readable_query(search_text, sites=[]):
76
        search_text = search_text.replace("%3A", ":").replace("%20", " ").replace("%2B", "+")
97
        search_text = search_text.replace("%3A", ":").replace("%20", " ").replace("%2B", "+")
77
        if len(sites) == 0:
98
        if len(sites) == 0:
...
...
86
                    query_url += or_string + "site:" + site
107
                    query_url += or_string + "site:" + site
87
                    or_string = " OR "
108
                    or_string = " OR "
88
                return query_url + ")"
109
                return query_url + ")"
89
110
90
    @staticmethod
111
    @staticmethod
112
    def search_count_scraping(search_text, sites=[]):
113
        try:
114
            response = urllib2.urlopen(SearchEngine.gigablast_search_url(search_text, sites).replace(" ", "%20"))
115
            html_page = response.read()
116
            # "Results <b>1</b> to <b>10</b> of about <b>5,278,952</b>"
117
            # "</form>No results found for <b>"
118
            str_no_results = "</form>No results found for <b>"
119
            if str_no_results in html_page:
120
                return 0 
121
            str_before = "</b> of about <b>"
122
            str_after = "</b>"
123
            temp_string = html_page[html_page.find(str_before) + len(str_before):]
124
            total = temp_string[:temp_string.find(str_after)].replace(",","")
125
            return total
126
        except HttpError as ex:
127
            return -1
128
129
    @staticmethod
91
    def search_count(search_text, sites=[]):
130
    def search_count(search_text, sites=[]):
131
        return SearchEngine.search_count_scraping(search_text, sites)
132
133
    @staticmethod
134
    def search_url(search_text, sites=[]):
135
        return SearchEngine.gigablast_search_url(search_text, sites)
136
137
    @staticmethod
138
    def google_search_count(search_text, sites=[]):
92
        '''
139
        '''
93
        https://developers.google.com/custom-search/json-api/v1/reference/cse/list
140
        https://developers.google.com/custom-search/json-api/v1/reference/cse/list
94
        exactTerms        string     Identifies a phrase that all documents in the search results must contain.
141
        exactTerms        string     Identifies a phrase that all documents in the search results must contain.
95
        excludeTerms      string     Identifies a word or phrase that should not appear in any documents in the search results.
142
        excludeTerms      string     Identifies a word or phrase that should not appear in any documents in the search results.
96
        siteSearch        string     Specifies all search results should be pages from a given site.
143
        siteSearch        string     Specifies all search results should be pages from a given site.
...
...
107
                for site in sites:
154
                for site in sites:
108
                    res = service.cse().list(q=search_text, siteSearch=site, cx='017576662512468239146:omuauf_lfve',).execute()
155
                    res = service.cse().list(q=search_text, siteSearch=site, cx='017576662512468239146:omuauf_lfve',).execute()
109
                    total += int(res['searchInformation']['totalResults'])
156
                    total += int(res['searchInformation']['totalResults'])
110
            return total
157
            return total
111
        except HttpError as ex:
158
        except HttpError as ex:
112
            pass
159
            return -1
113
160
114
    @staticmethod
161
    @staticmethod
115
    def trends_img(project_name):
162
    def trends_img(project_name):
116
        return "<script type=\"text/javascript\" src=\"//www.google.com/trends/embed.js?q=" + project_name + "&content=1&cid=TIMESERIES_GRAPH_0&export=5&w=50&h=40\"></script>"
163
        return "<script type=\"text/javascript\" src=\"//www.google.com/trends/embed.js?q=" + project_name + "&content=1&cid=TIMESERIES_GRAPH_0&export=5&w=50&h=40\"></script>"
117
    
164