remove chars that format the underlying HTML-code (newline and tab) from the message

This commit is contained in:
2021-04-21 12:55:46 +02:00
parent 123362f4db
commit f974b7288a

View File

@ -34,6 +34,11 @@ http = urllib3.PoolManager(cert_reqs="CERT_NONE")
# HTML messages contain some code-formatting characters
FORMATTING_CHARS = ["\n", "\t"]
class ELogScraper:
def __init__(self, url, output_folder=".", attachment_subfolder="attachments"):
@ -77,6 +82,7 @@ class ELogScraper:
def get_entry(self, index):
message, attributes, attachments = self.elog_read(index)
message = sanitize_message(message)
attributes = sanitize_attributes(index, attributes)
attachments = sanitize_attachments(attachments, self.url)
fns = self.fd.get(attachments)
@ -108,6 +114,14 @@ def retry(func):
return res
return wrapper
def sanitize_message(message):
return remove_all(message, FORMATTING_CHARS)
def remove_all(s, chars):
for c in chars:
s = s.replace(c, "")
return s
def sanitize_attributes(i, attributes):
mid = attributes.pop("$@MID@$")
mid = int(mid)