OSSEval / Code / [487474] /OSSEval/IMDBMovieApp/models.py

[487474]: OSSEval / IMDBMovieApp / models.py History

models.py 57 lines (47 with data), 2.2 kB

# This Source Code Form of OSSEval is subject to the terms of the GNU AFFERO
# GENERAL PUBLIC LICENSE, v. 3.0. If a copy of the AGPL was not
# distributed with this file, You can obtain one at http://www.gnu.org/licenses/agpl.txt
#
# OSSeval is powered by the SOS Open Source AGPL edition.
#  The AGPL requires that you do not remove the SOS Open Source attribution and copyright 
#  notices from the user interface (see section 5.d below).

# OSSEval Copyright 2014 Bitergium SLL
# SOS Open Source Copyright 2012 Roberto Galoppini
# Author: Davide Galletti 


from django.db import models
from OSSEval.utils import xmlMinidom
from lxml import html
import urllib2

class IMDBMovie(models.Model):
    """
    """
    imdb_id = models.CharField(max_length=20)
    title = models.CharField(max_length=200)
    description = models.TextField(blank=True)
#     instance = models.OneToOneField('analysis.Instance', related_name='actual_instance', primary_key=True)
    instance = models.OneToOneField('analysis.Instance', related_name='imdb_movie', primary_key=True)

    def __str__(self):
        return self.title + " - " + self.instance.name
    
    def to_xml(self):
        str_xml = ""
        return '<' + self.__class__.__name__ + ' Title="' + self.title + '">' + str_xml + "</" + self.__class__.__name__ + ">"
    
    def from_xml(self, xmldoc, instance, insert = True):
        self.title = xmlMinidom.getStringAttribute(xmldoc, 'Title')
        self.instance = instance
        self.save()

    def getInstanceInfo(self):
        '''
        Part of the required interface
        '''
        i = {}
        # devo usare l'id tt2357377 di IMDBMovie per accedere alla pagina http://www.imdb.com/title/tt2357377/
        url_imdb = "http://www.imdb.com/title/" + self.imdb_id + "/"
        response = urllib2.urlopen(url_imdb)
        html_page = response.read()
        self.tree = html.fromstring(html_page)
        self.title = self.tree.xpath('//meta[@property="og:title"]')[0].get('content')
        self.description = self.tree.xpath('//meta[@property="og:description"]')[0].get('content')
        i['title'] = self.title
        i['description'] = self.description
        ### etc....
        return i