remove chars that format the underlying HTML-code (newline and tab) from the message
This commit is contained in:
14
elogdump.py
14
elogdump.py
@ -34,6 +34,11 @@ http = urllib3.PoolManager(cert_reqs="CERT_NONE")
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# HTML messages contain some code-formatting characters
|
||||||
|
FORMATTING_CHARS = ["\n", "\t"]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class ELogScraper:
|
class ELogScraper:
|
||||||
|
|
||||||
def __init__(self, url, output_folder=".", attachment_subfolder="attachments"):
|
def __init__(self, url, output_folder=".", attachment_subfolder="attachments"):
|
||||||
@ -77,6 +82,7 @@ class ELogScraper:
|
|||||||
|
|
||||||
def get_entry(self, index):
|
def get_entry(self, index):
|
||||||
message, attributes, attachments = self.elog_read(index)
|
message, attributes, attachments = self.elog_read(index)
|
||||||
|
message = sanitize_message(message)
|
||||||
attributes = sanitize_attributes(index, attributes)
|
attributes = sanitize_attributes(index, attributes)
|
||||||
attachments = sanitize_attachments(attachments, self.url)
|
attachments = sanitize_attachments(attachments, self.url)
|
||||||
fns = self.fd.get(attachments)
|
fns = self.fd.get(attachments)
|
||||||
@ -108,6 +114,14 @@ def retry(func):
|
|||||||
return res
|
return res
|
||||||
return wrapper
|
return wrapper
|
||||||
|
|
||||||
|
def sanitize_message(message):
|
||||||
|
return remove_all(message, FORMATTING_CHARS)
|
||||||
|
|
||||||
|
def remove_all(s, chars):
|
||||||
|
for c in chars:
|
||||||
|
s = s.replace(c, "")
|
||||||
|
return s
|
||||||
|
|
||||||
def sanitize_attributes(i, attributes):
|
def sanitize_attributes(i, attributes):
|
||||||
mid = attributes.pop("$@MID@$")
|
mid = attributes.pop("$@MID@$")
|
||||||
mid = int(mid)
|
mid = int(mid)
|
||||||
|
Reference in New Issue
Block a user