Parent: [7f6519] (diff)

Child: [c22082] (diff)

Download this file

utils.py    100 lines (90 with data), 3.9 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from json import JSONEncoder
from lxml import html
import urllib2
from apiclient.discovery import build
from apiclient.errors import HttpError
class TrivialJSONEncoder(JSONEncoder):
def default(self, o):
return o.__dict__
class xmlMinidom():
@staticmethod
def getString(xmldoc, tag):
try:
return xmldoc.getElementsByTagName(tag)[0].firstChild.data
except:
return ""
@staticmethod
def getStringAttribute(xmldoc, tag):
try:
return xmldoc.attributes[tag].firstChild.data
except:
return ""
@staticmethod
def getNaturalAttribute(xmldoc, tag):
'''
a natural number; if it's not there -1 is returned
'''
try:
return int(xmldoc.attributes[tag].firstChild.data)
except:
return None
class SearchEngine():
@staticmethod
def search__engine_name():
return "Google"
@staticmethod
def search_url(search_text, sites=[]):
search_text = search_text.replace(":","%3A").replace("+", "%2B")
if len(sites) == 0:
return "https://www.google.com/?#q=" + search_text
else:
if len(sites) == 1:
return "https://www.google.com/?#q=" + search_text + "+site%3A" + sites[0]
else:
query_url = "https://www.google.com/?#q=" + search_text + " ("
or_string = ""
for site in sites:
query_url += or_string + "site%3A" + site
or_string = " OR "
return query_url + ")"
@staticmethod
def readable_query(search_text, sites=[]):
search_text = search_text.replace("%3A", ":").replace("%20", " ").replace("%2B", "+")
if len(sites) == 0:
return search_text
else:
if len(sites) == 1:
return search_text + "+site:" + sites[0]
else:
query_url = search_text + "("
or_string = ""
for site in sites:
query_url += or_string + "site:" + site
or_string = " OR "
return query_url + ")"
@staticmethod
def search_count(search_text, sites=[]):
'''
https://developers.google.com/custom-search/json-api/v1/reference/cse/list
exactTerms string Identifies a phrase that all documents in the search results must contain.
excludeTerms string Identifies a word or phrase that should not appear in any documents in the search results.
siteSearch string Specifies all search results should be pages from a given site.
siteSearchFilter string Controls whether to include or exclude results from the site named in the siteSearch parameter.
Acceptable values are: "e": exclude "i": include
'''
try:
service = build("customsearch", "v1", developerKey="AIzaSyCAova46cAkHga_SZWTBqROdjoz1KcTlw8")
total = 0
if len(sites) == 0:
res = service.cse().list(q=search_text, cx='017576662512468239146:omuauf_lfve',).execute()
total = int(res['searchInformation']['totalResults'])
else:
for site in sites:
res = service.cse().list(q=search_text, siteSearch=site, cx='017576662512468239146:omuauf_lfve',).execute()
total += int(res['searchInformation']['totalResults'])
return total
except HttpError as ex:
pass
@staticmethod
def trends_img(project_name):
return "<script type=\"text/javascript\" src=\"//www.google.com/trends/embed.js?q=" + project_name + "&content=1&cid=TIMESERIES_GRAPH_0&export=5&w=50&h=40\"></script>"