Skip to content

Commit

Permalink
Fixed parsing messages with deeply nested tags (issue #38).
Browse files Browse the repository at this point in the history
  • Loading branch information
suurjaak committed Jun 30, 2015
1 parent 0758711 commit 3d1d179
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 37 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
CHANGELOG
=========

3.5a, 2015-06-15
3.5c, 2015-06-30
------------------
- added emoticons to chat statistics;
- added shared image download for HTML export;
- added chat and author filters to command-line export;
- applying or resetting message filter will scroll to last selection;
- added support for copying selected list items to clipboard;
- stopped caching messages on export to avoid memory shortage;
- fixed parsing messages with deeply nested HTML (issue #38);
- fixed a potential error with unexpected data in quoted messages;
- fixed a potential error message on filtering chat messages;
- made database comparison report window retain scroll position at the bottom;
Expand Down
6 changes: 3 additions & 3 deletions skyperious/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
@author Erki Suurjaak
@created 26.11.2011
@modified 15.06.2015
@modified 30.06.2015
------------------------------------------------------------------------------
"""
from ConfigParser import RawConfigParser
Expand All @@ -23,8 +23,8 @@

"""Program title, version number and version date."""
Title = "Skyperious"
Version = "3.5a"
VersionDate = "15.06.2015"
Version = "3.5c"
VersionDate = "30.06.2015"

if getattr(sys, "frozen", False):
# Running as a pyinstaller executable
Expand Down
50 changes: 17 additions & 33 deletions skyperious/skypedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
@author Erki Suurjaak
@created 26.11.2011
@modified 26.06.2015
@modified 30.06.2015
------------------------------------------------------------------------------
"""
import cgi
Expand Down Expand Up @@ -1604,7 +1604,9 @@ def parse_message_dom(self, message, options):
body = self.EMOTICON_RGX.sub(self.EMOTICON_REPL, body)
dom = self.make_xml(body, message)

if MESSAGE_TYPE_SMS == message["type"]:
if MESSAGE_TYPE_SMS == message["type"] \
or (MESSAGE_TYPE_INFO == message["type"]
and "<sms" in message["body_xml"]):
# SMS body can be plaintext, or can be XML. Relevant tags:
# <sms alt="It's hammer time."><status>6</status>
# <failurereason>0</failurereason><targets>
Expand Down Expand Up @@ -1993,37 +1995,19 @@ def dom_to_html(self, dom, output, message):

def dom_to_text(self, dom):
"""Returns a plaintext representation of the message DOM."""
fulltext = ""
to_skip = {} # {element to skip: True, }
for elem in dom.getiterator():
if elem in to_skip:
continue
text = elem.text or ""
tail = elem.tail or ""
subitems = []
if "quote" == elem.tag:
text = "\"" + text
subitems = elem.getchildren()
elif "quotefrom" == elem.tag:
text = "\"\r\n%s\r\n" % text
elif "msgstatus" == elem.tag:
text = "[%s]\r\n" % text.strip()
elif "ss" == elem.tag:
text = elem.text
elif elem.tag in ["i", "b", "s"]: # italic bold strikethrough
pre = post = dict(zip("ibs", "_*~"))[elem.tag]
if elem.get("raw_pre"): pre = elem.get("raw_pre")
if elem.get("raw_post"): post = elem.get("raw_post")
text, tail = pre + text, post + tail
subitems = elem.getchildren()
if text:
fulltext += text
for i in subitems:
fulltext += self.dom_to_text(i)
to_skip[i] = True
if tail:
fulltext += tail
return fulltext
text, tail = dom.text or "", dom.tail or ""
if "quote" == dom.tag:
text = "\"" + text
elif "quotefrom" == dom.tag:
text = "\"\r\n%s\r\n" % text
elif "msgstatus" == dom.tag:
text = "[%s]\r\n" % text.strip()
elif dom.tag in ["i", "b", "s"]: # italic bold strikethrough
pre = post = dict(i="_", b="*", s="~")[dom.tag]
if dom.get("raw_pre"): pre = dom.get("raw_pre")
if dom.get("raw_post"): post = dom.get("raw_post")
text, tail = pre + text, post + tail
return text + "".join(self.dom_to_text(x) for x in dom) + tail


def sanitize(self, dom, known_tags):
Expand Down

0 comments on commit 3d1d179

Please sign in to comment.