OSSEval / Code / [487474] /OSSEval/IMDBMovieApp/imdb.py

[487474]: OSSEval / IMDBMovieApp / imdb.py History

imdb.py 81 lines (70 with data), 3.3 kB

# This Source Code Form of OSSEval is subject to the terms of the GNU AFFERO
# GENERAL PUBLIC LICENSE, v. 3.0. If a copy of the AGPL was not
# distributed with this file, You can obtain one at http://www.gnu.org/licenses/agpl.txt
#
# OSSeval is powered by the SOS Open Source AGPL edition.
#  The AGPL requires that you do not remove the SOS Open Source attribution and copyright 
#  notices from the user interface (see section 5.d below).

# OSSEval Copyright 2014 Bitergium SLL
# SOS Open Source Copyright 2012 Roberto Galoppini
# Author: Davide Galletti 

from xml.dom import minidom
from OSSEval.utils import TrivialJSONEncoder 
import json
import urllib2
from lxml import html
from IMDBMovieApp.models import IMDBMovie

class IMDBProxy():
    
    @staticmethod
    def search(title):
        print "IMDB searching for " + title
        movie_list = []
        all_results = []
#         s=tt to search on titles
#         q=berlin+wall   searches both words
        url_imdb = "http://www.imdb.com/xml/find?json=1&s=tt&q=" + title.replace(" ", "+")
        response = urllib2.urlopen(url_imdb)
        imdb_json = response.read()
        try:
            # if the result is exaclty one movie instead of sending back json as requested
            # they might send the html page of the movie and json load fails
            decoded = json.loads(imdb_json)
#             look for results in     title_approx , title_substring , title_exact, title_popular
            if 'title_exact' in decoded.keys():
                all_results += decoded['title_exact']
            if 'title_substring' in decoded.keys():
                all_results += decoded['title_substring']
            if 'title_approx' in decoded.keys():
                all_results += decoded['title_approx']
            if 'title_popular' in decoded.keys():
                all_results += decoded['title_popular']
            for movie in all_results:
                m = IMDBMovie()
                m.imdb_id = movie['id']
                m.title = movie['title']
                m.description = movie['description']
                movie_list.append(m)
        except:
            # it might be an html page with just one movie
            imdb_html = imdb_json
            tree = html.fromstring(imdb_html)
            m = IMDBMovie()
            # <meta property="og:url" content="http://www.imdb.com/title/tt0072684/" />
            url = tree.xpath('//meta[@property="og:url"]')[0].get('content')
            url = url[:-1]
            m.imdb_id = url[url.rfind("/")+1:]
            m.title = tree.xpath('//meta[@property="og:title"]')[0].get('content')
            m.description = tree.xpath('//meta[@property="og:description"]')[0].get('content')
            movie_list.append(m)

        return movie_list

    @staticmethod
    def getMovieInfo(project_identifier):
        ### perche' ho questo e ho anche IMDBMovie.getInstanceInfo ?
        url_ohloh = "http://www.openhub.net/projects/" + project_identifier + ".xml?api_key=" + Configuration.api_key
        xml = UrllibHelper.urlopen(url_ohloh)
        xmldoc = minidom.parseString(xml)
        info = {}
        
        status = xmlElements(xmldoc.getElementsByTagName('status')).firstValue()
        if status == 'success':
            info['id'] = project_identifier
                
        return info