diff --git a/elogdump.py b/elogdump.py index 08aeecc..47002a5 100755 --- a/elogdump.py +++ b/elogdump.py @@ -34,6 +34,11 @@ http = urllib3.PoolManager(cert_reqs="CERT_NONE") +# HTML messages contain some code-formatting characters +FORMATTING_CHARS = ["\n", "\t"] + + + class ELogScraper: def __init__(self, url, output_folder=".", attachment_subfolder="attachments"): @@ -77,6 +82,7 @@ class ELogScraper: def get_entry(self, index): message, attributes, attachments = self.elog_read(index) + message = sanitize_message(message) attributes = sanitize_attributes(index, attributes) attachments = sanitize_attachments(attachments, self.url) fns = self.fd.get(attachments) @@ -108,6 +114,14 @@ def retry(func): return res return wrapper +def sanitize_message(message): + return remove_all(message, FORMATTING_CHARS) + +def remove_all(s, chars): + for c in chars: + s = s.replace(c, "") + return s + def sanitize_attributes(i, attributes): mid = attributes.pop("$@MID@$") mid = int(mid)