Switch to unified view

a b/OSSEval/IMDBMovieApp/imdb.py
1
# This Source Code Form of OSSEval is subject to the terms of the GNU AFFERO
2
# GENERAL PUBLIC LICENSE, v. 3.0. If a copy of the AGPL was not
3
# distributed with this file, You can obtain one at http://www.gnu.org/licenses/agpl.txt
4
#
5
# OSSeval is powered by the SOS Open Source AGPL edition.
6
#  The AGPL requires that you do not remove the SOS Open Source attribution and copyright 
7
#  notices from the user interface (see section 5.d below).
8
9
# OSSEval Copyright 2014 Bitergium SLL
10
# SOS Open Source Copyright 2012 Roberto Galoppini
11
# Author: Davide Galletti 
12
13
from xml.dom import minidom
14
from OSSEval.utils import TrivialJSONEncoder 
15
import json
16
import urllib2
17
from lxml import html
18
from IMDBMovieApp.models import IMDBMovie
19
20
class IMDBProxy():
21
    
22
    @staticmethod
23
    def search(title):
24
        print "IMDB searching for " + title
25
        movie_list = []
26
        all_results = []
27
#         s=tt to search on titles
28
#         q=berlin+wall   searches both words
29
        url_imdb = "http://www.imdb.com/xml/find?json=1&s=tt&q=" + title.replace(" ", "+")
30
        response = urllib2.urlopen(url_imdb)
31
        imdb_json = response.read()
32
        try:
33
            # if the result is exaclty one movie instead of sending back json as requested
34
            # they might send the html page of the movie and json load fails
35
            decoded = json.loads(imdb_json)
36
#             look for results in     title_approx , title_substring , title_exact, title_popular
37
            if 'title_exact' in decoded.keys():
38
                all_results += decoded['title_exact']
39
            if 'title_substring' in decoded.keys():
40
                all_results += decoded['title_substring']
41
            if 'title_approx' in decoded.keys():
42
                all_results += decoded['title_approx']
43
            if 'title_popular' in decoded.keys():
44
                all_results += decoded['title_popular']
45
            for movie in all_results:
46
                m = IMDBMovie()
47
                m.imdb_id = movie['id']
48
                m.title = movie['title']
49
                m.description = movie['description']
50
                movie_list.append(m)
51
        except:
52
            # it might be an html page with just one movie
53
            imdb_html = imdb_json
54
            tree = html.fromstring(imdb_html)
55
            m = IMDBMovie()
56
            # <meta property="og:url" content="http://www.imdb.com/title/tt0072684/" />
57
            url = tree.xpath('//meta[@property="og:url"]')[0].get('content')
58
            url = url[:-1]
59
            m.imdb_id = url[url.rfind("/")+1:]
60
            m.title = tree.xpath('//meta[@property="og:title"]')[0].get('content')
61
            m.description = tree.xpath('//meta[@property="og:description"]')[0].get('content')
62
            movie_list.append(m)
63
64
        return movie_list
65
66
    @staticmethod
67
    def getMovieInfo(project_identifier):
68
        ### perche' ho questo e ho anche IMDBMovie.getInstanceInfo ?
69
        url_ohloh = "http://www.openhub.net/projects/" + project_identifier + ".xml?api_key=" + Configuration.api_key
70
        xml = UrllibHelper.urlopen(url_ohloh)
71
        xmldoc = minidom.parseString(xml)
72
        info = {}
73
        
74
        status = xmlElements(xmldoc.getElementsByTagName('status')).firstValue()
75
        if status == 'success':
76
            info['id'] = project_identifier
77
                
78
        return info
79
80