from time import mktime
from datetime import datetime
from HTMLParser import HTMLParser
import feedparser
import html2text
from bson import ObjectId
import base
from allura.command import base as allura_base
from ming.orm import session
from pylons import c
from allura import model as M
from forgeblog import model as BM
from forgeblog import version
from forgeblog.main import ForgeBlogApp
from allura.lib import exceptions
html2text.BODY_WIDTH = 0
class MDHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.NO_END_TAGS = ["area", "base", "basefont", "br", "col", "frame",
"hr", "img", "input", "link", "meta", "param"]
self.CUSTTAG_OPEN = u"[plain]"
self.CUSTTAG_CLOSE = u"[/plain]"
self.result_doc = u""
self.custom_tag_opened = False
def handle_starttag(self, tag, attrs):
if self.custom_tag_opened:
self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
self.custom_tag_opened = False
tag_text = u"<%s" % tag
for attr in attrs:
if attr[1].find('"'):
tag_text = u"%s %s='%s'" % (tag_text, attr[0], attr[1])
else:
tag_text = u'%s %s="%s"' % (tag_text, attr[0], attr[1])
if tag not in self.NO_END_TAGS:
tag_text = tag_text + ">"
else:
tag_text = tag_text + "/>"
self.result_doc = u"%s%s" % (self.result_doc, tag_text)
def handle_endtag(self, tag):
if tag not in self.NO_END_TAGS:
if self.custom_tag_opened:
self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
self.custom_tag_opened = False
self.result_doc = u"%s</%s>" % (self.result_doc, tag)
def handle_data(self, data):
res_data = ''
for line in data.splitlines(True):
# pre-emptive special case
if not line or line.isspace():
# don't wrap all whitespace lines
res_data += line
continue
# open custom tag
if not self.custom_tag_opened:
res_data += self.CUSTTAG_OPEN
self.custom_tag_opened = True
# else: cust tag might be open already from previous incomplete data block
# data
res_data += line.rstrip('\r\n') # strip EOL (add close tag before)
# close custom tag
if line.endswith(('\r','\n')):
res_data += self.CUSTTAG_CLOSE + '\n'
self.custom_tag_opened = False
# else: no EOL could mean we're dealing with incomplete data block;
# leave it open for next handle_data, handle_starttag, or handle_endtag to clean up
self.result_doc += res_data
def handle_comment(self, data):
if self.custom_tag_opened:
self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
self.custom_tag_opened = False
self.result_doc = u"%s<!-- %s -->" % (self.result_doc, data)
def handle_entityref(self, name):
if not self.custom_tag_opened:
self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_OPEN)
self.custom_tag_opened = True
self.result_doc = u"%s&%s;" % (self.result_doc, name)
def handle_charref(self, name):
if not self.custom_tag_opened:
self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_OPEN)
self.custom_tag_opened = True
self.result_doc = u"%s&%s;" % (self.result_doc, name)
def handle_decl(self, data):
if self.custom_tag_opened:
self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
self.custom_tag_opened = False
self.result_doc = u"%s<!%s>" % (self.result_doc, data)
def close(self):
HTMLParser.close(self)
if self.custom_tag_opened:
self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
self.custom_tag_opened = False
class RssFeedsCommand(base.BlogCommand):
summary = 'Rss feed client'
parser = base.BlogCommand.standard_parser(verbose=True)
parser.add_option('-a', '--appid', dest='appid', default='',
help='application id')
parser.add_option('-u', '--username', dest='username', default='root',
help='poster username')
def command(self):
self.basic_setup()
user = M.User.query.get(username=self.options.username)
c.user = user
self.prepare_feeds()
for appid in self.feed_dict:
for feed_url in self.feed_dict[appid]:
self.process_feed(appid, feed_url)
def prepare_feeds(self):
feed_dict = {}
if self.options.appid != '':
gl_app = BM.Globals.query.get(app_config_id=ObjectId(self.options.appid))
if not gl_app:
raise exceptions.NoSuchGlobalsError("The globals %s " \
"could not be found in the database" % self.options.appid)
if len(gl_app.external_feeds) > 0:
feed_dict[gl_app.app_config_id] = gl_app.external_feeds
else:
for gl_app in BM.Globals.query.find().all():
if len(gl_app.external_feeds) > 0:
feed_dict[gl_app.app_config_id] = gl_app.external_feeds
self.feed_dict = feed_dict
def process_feed(self, appid, feed_url):
appconf = M.AppConfig.query.get(_id=appid)
if not appconf:
return
c.project = appconf.project
app = ForgeBlogApp(c.project, appconf)
c.app = app
allura_base.log.info("Get feed: %s" % feed_url)
f = feedparser.parse(feed_url)
if f.bozo:
allura_base.log.exception("%s: %s" % (feed_url, f.bozo_exception))
return
for e in f.entries:
title = e.title
if 'content' in e:
content = u''
for ct in e.content:
if ct.type != 'text/html':
content += '[plain]%s[/plain]' % ct.value
else:
parser = MDHTMLParser()
parser.feed(ct.value)
parser.close() # must be before using the result_doc
markdown_content = html2text.html2text(parser.result_doc, baseurl=e.link)
content += markdown_content
else:
content = '[plain]%s[/plain]' % getattr(e, 'summary',
getattr(e, 'subtitle',
getattr(e, 'title')))
content += u' [link](%s)' % e.link
updated = datetime.utcfromtimestamp(mktime(e.updated_parsed))
base_slug = BM.BlogPost.make_base_slug(title, updated)
b_count = BM.BlogPost.query.find(dict(slug=base_slug, app_config_id=appid)).count()
if b_count == 0:
post = BM.BlogPost(title=title, text=content, timestamp=updated,
app_config_id=appid,
tool_version={'blog': version.__version__},
state='published')
post.neighborhood_id=c.project.neighborhood_id
post.make_slug()
post.commit()
session(BM.BlogPost).flush()