Parent: [2c9463] (diff)

Child: [c9d94b] (diff)

Download this file

utils.py    68 lines (59 with data), 2.6 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
from json import JSONEncoder
from lxml import html
import urllib2
from apiclient.discovery import build
class TrivialJSONEncoder(JSONEncoder):
def default(self, o):
return o.__dict__
class xmlMinidom():
@staticmethod
def getString(xmldoc, tag):
try:
return xmldoc.getElementsByTagName(tag)[0].firstChild.data
except:
return ""
@staticmethod
def getStringAttribute(xmldoc, tag):
try:
return xmldoc.attributes[tag].firstChild.data
except:
return ""
@staticmethod
def getNaturalAttribute(xmldoc, tag):
'''
a natural number; if it's not there -1 is returned
'''
try:
return int(xmldoc.attributes[tag].firstChild.data)
except:
return -1
class SearchEngine():
@staticmethod
def search__engine_name():
return "Google"
@staticmethod
def search_url(search_text, site_search=""):
if site_search == "":
return "https://www.google.com/?#q=" + search_text
else:
return "https://www.google.com/?#q=" + search_text + "+site%3A" + site_search
@staticmethod
def readable_query(search_text, site_search=""):
return search_text.replace("%3A", ":").replace("%20", " ").replace("%2B", "+") + ("" if site_search=="" else " site:" + site_search)
@staticmethod
def search_count(search_text, site_search=""):
'''
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
exactTerms string Identifies a phrase that all documents in the search results must contain.
excludeTerms string Identifies a word or phrase that should not appear in any documents in the search results.
siteSearch string Specifies all search results should be pages from a given site.
siteSearchFilter string Controls whether to include or exclude results from the site named in the siteSearch parameter.
Acceptable values are: "e": exclude "i": include
'''
service = build("customsearch", "v1", developerKey="AIzaSyCAova46cAkHga_SZWTBqROdjoz1KcTlw8")
print search_text + " " + site_search
if site_search == "":
res = service.cse().list(q=search_text, cx='017576662512468239146:omuauf_lfve',).execute()
else:
res = service.cse().list(q=search_text, siteSearch=site_search, cx='017576662512468239146:omuauf_lfve',).execute()
return int(res['searchInformation']['totalResults'])