opensourceprojects.eu / osp-allura / Diff of /ForgeBlog/forgeblog/command/rssfeeds.py

Diff of /ForgeBlog/forgeblog/command/rssfeeds.py [e1b98e] .. [7375db]

Switch to side-by-side view

--- a/ForgeBlog/forgeblog/command/rssfeeds.py
+++ b/ForgeBlog/forgeblog/command/rssfeeds.py
@@ -1,6 +1,5 @@
 from time import mktime
 from datetime import datetime
-from HTMLParser import HTMLParser
 
 import feedparser
 import html2text
@@ -20,104 +19,6 @@
 from allura.lib.decorators import exceptionless
 
 html2text.BODY_WIDTH = 0
-
-class MDHTMLParser(HTMLParser):
-    def __init__(self):
-        HTMLParser.__init__(self)
-        self.NO_END_TAGS = ["area", "base", "basefont", "br", "col", "frame",
-                            "hr", "img", "input", "link", "meta", "param"]
-        self.CUSTTAG_OPEN = u"[plain]"
-        self.CUSTTAG_CLOSE = u"[/plain]"
-        self.result_doc = u""
-        self.custom_tag_opened = False
-
-    def handle_starttag(self, tag, attrs):
-        if self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-            self.custom_tag_opened = False
-
-        tag_text = u"<%s" % tag
-        for attr in attrs:
-            if attr[1].find('"'):
-                tag_text = u"%s %s='%s'" % (tag_text, attr[0], attr[1])
-            else:
-                tag_text = u'%s %s="%s"' % (tag_text, attr[0], attr[1])
-        if tag not in self.NO_END_TAGS:
-            tag_text = tag_text + ">"
-        else:
-            tag_text = tag_text + "/>"
-        self.result_doc = u"%s%s" % (self.result_doc, tag_text)
-
-    def handle_endtag(self, tag):
-        if tag not in self.NO_END_TAGS:
-            if self.custom_tag_opened:
-                self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-                self.custom_tag_opened = False
-
-            self.result_doc = u"%s</%s>" % (self.result_doc, tag)
-
-    def handle_data(self, data):
-        res_data = ''
-
-        for line in data.splitlines(True):
-            # pre-emptive special case
-            if not line or line.isspace():
-                # don't wrap all whitespace lines
-                res_data += line
-                continue
-
-            # open custom tag
-            if not self.custom_tag_opened:
-                res_data += self.CUSTTAG_OPEN
-                self.custom_tag_opened = True
-            # else: cust tag might be open already from previous incomplete data block
-
-            # data
-            res_data += line.rstrip('\r\n')  # strip EOL (add close tag before)
-
-            # close custom tag
-            if line.endswith(('\r','\n')):
-                res_data += self.CUSTTAG_CLOSE + '\n'
-                self.custom_tag_opened = False
-            # else: no EOL could mean we're dealing with incomplete data block;
-                # leave it open for next handle_data, handle_starttag, or handle_endtag to clean up
-
-        self.result_doc += res_data
-
-    def handle_comment(self, data):
-        if self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-            self.custom_tag_opened = False
-
-        self.result_doc = u"%s<!-- %s -->" % (self.result_doc, data)
-
-    def handle_entityref(self, name):
-        if not self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_OPEN)
-            self.custom_tag_opened = True
-
-        self.result_doc = u"%s&%s;" % (self.result_doc, name)
-
-    def handle_charref(self, name):
-        if not self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_OPEN)
-            self.custom_tag_opened = True
-
-        self.result_doc = u"%s&%s;" % (self.result_doc, name)
-
-    def handle_decl(self, data):
-        if self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-            self.custom_tag_opened = False
-
-        self.result_doc = u"%s<!%s>" % (self.result_doc, data)
-
-    def close(self):
-        HTMLParser.close(self)
-
-        if self.custom_tag_opened:
-            self.result_doc = u"%s%s" % (self.result_doc, self.CUSTTAG_CLOSE)
-            self.custom_tag_opened = False
 
 
 class RssFeedsCommand(base.BlogCommand):
@@ -189,18 +90,17 @@
             content = u''
             for ct in e.content:
                 if ct.type != 'text/html':
-                    content += '[plain]%s[/plain]' % ct.value
+                    content += html2text.escape_md_section(ct.value, snob=True)
                 else:
-                    parser = MDHTMLParser()
-                    parser.feed(ct.value)
-                    parser.close() # must be before using the result_doc
-                    markdown_content = html2text.html2text(parser.result_doc, baseurl=e.link)
-
+                    html2md = html2text.HTML2Text(baseurl=e.link)
+                    html2md.escape_snob = True
+                    markdown_content = html2md.handle(ct.value)
                     content += markdown_content
         else:
-            content = '[plain]%s[/plain]' % getattr(e, 'summary',
-                                                getattr(e, 'subtitle',
-                                                    getattr(e, 'title')))
+            content = html2text.escape_md_section(getattr(e, 'summary',
+                                                    getattr(e, 'subtitle',
+                                                      getattr(e, 'title'))),
+                                                  snob=True)
 
         content += u' [link](%s)' % e.link
         updated = datetime.utcfromtimestamp(mktime(e.updated_parsed))