remove chars that format the underlying HTML-code (newline and tab) from the message
This commit is contained in:
14
elogdump.py
14
elogdump.py
@ -34,6 +34,11 @@ http = urllib3.PoolManager(cert_reqs="CERT_NONE")
|
||||
|
||||
|
||||
|
||||
# HTML messages contain some code-formatting characters
|
||||
FORMATTING_CHARS = ["\n", "\t"]
|
||||
|
||||
|
||||
|
||||
class ELogScraper:
|
||||
|
||||
def __init__(self, url, output_folder=".", attachment_subfolder="attachments"):
|
||||
@ -77,6 +82,7 @@ class ELogScraper:
|
||||
|
||||
def get_entry(self, index):
|
||||
message, attributes, attachments = self.elog_read(index)
|
||||
message = sanitize_message(message)
|
||||
attributes = sanitize_attributes(index, attributes)
|
||||
attachments = sanitize_attachments(attachments, self.url)
|
||||
fns = self.fd.get(attachments)
|
||||
@ -108,6 +114,14 @@ def retry(func):
|
||||
return res
|
||||
return wrapper
|
||||
|
||||
def sanitize_message(message):
|
||||
return remove_all(message, FORMATTING_CHARS)
|
||||
|
||||
def remove_all(s, chars):
|
||||
for c in chars:
|
||||
s = s.replace(c, "")
|
||||
return s
|
||||
|
||||
def sanitize_attributes(i, attributes):
|
||||
mid = attributes.pop("$@MID@$")
|
||||
mid = int(mid)
|
||||
|
Reference in New Issue
Block a user