From 6b59fe16cea10c4e7a55e457d2cb55be95834fbd Mon Sep 17 00:00:00 2001 From: chrin Date: Tue, 27 Feb 2024 15:40:00 +0100 Subject: [PATCH] added 3rd party packages, elog, bigtree --- packages/EGG-INFO/PKG-INFO | 111 ++ packages/EGG-INFO/SOURCES.txt | 8 + packages/EGG-INFO/dependency_links.txt | 1 + packages/EGG-INFO/top_level.txt | 1 + packages/EGG-INFO/zip-safe | 1 + packages/elog.pth | 1 + packages/elog/__init__.py | 13 + packages/elog/logbook.py | 571 ++++++ packages/elog/logbook_exceptions.py | 28 + python310/packages/EGG-INFO/PKG-INFO | 111 ++ python310/packages/EGG-INFO/SOURCES.txt | 8 + .../packages/EGG-INFO/dependency_links.txt | 1 + python310/packages/EGG-INFO/top_level.txt | 1 + python310/packages/EGG-INFO/zip-safe | 1 + python310/packages/bigtree/__init__.py | 78 + .../packages/bigtree/binarytree/__init__.py | 0 .../packages/bigtree/binarytree/construct.py | 53 + python310/packages/bigtree/dag/__init__.py | 0 python310/packages/bigtree/dag/construct.py | 206 ++ python310/packages/bigtree/dag/export.py | 298 +++ python310/packages/bigtree/globals.py | 3 + python310/packages/bigtree/node/__init__.py | 0 python310/packages/bigtree/node/basenode.py | 780 ++++++++ python310/packages/bigtree/node/binarynode.py | 418 +++++ python310/packages/bigtree/node/dagnode.py | 672 +++++++ python310/packages/bigtree/node/node.py | 261 +++ python310/packages/bigtree/py.typed | 0 python310/packages/bigtree/tree/__init__.py | 0 python310/packages/bigtree/tree/construct.py | 1327 +++++++++++++ python310/packages/bigtree/tree/export.py | 1660 +++++++++++++++++ python310/packages/bigtree/tree/helper.py | 415 +++++ python310/packages/bigtree/tree/modify.py | 1356 ++++++++++++++ python310/packages/bigtree/tree/search.py | 479 +++++ python310/packages/bigtree/utils/__init__.py | 0 .../packages/bigtree/utils/assertions.py | 53 + python310/packages/bigtree/utils/constants.py | 165 ++ .../packages/bigtree/utils/exceptions.py | 126 ++ python310/packages/bigtree/utils/groot.py | 19 + python310/packages/bigtree/utils/iterators.py | 587 ++++++ python310/packages/bigtree/utils/plot.py | 354 ++++ .../packages/bigtree/workflows/__init__.py | 0 .../bigtree/workflows/app_calendar.py | 200 ++ .../packages/bigtree/workflows/app_todo.py | 261 +++ python310/packages/elog.pth | 1 + python310/packages/elog/__init__.py | 13 + python310/packages/elog/logbook.py | 571 ++++++ python310/packages/elog/logbook_exceptions.py | 28 + python37/packages/bigtree/__init__.py | 68 + .../packages/bigtree/binarytree/construct.py | 50 + python37/packages/bigtree/dag/__init__.py | 0 python37/packages/bigtree/dag/construct.py | 186 ++ python37/packages/bigtree/dag/export.py | 269 +++ python37/packages/bigtree/node/__init__.py | 0 python37/packages/bigtree/node/basenode.py | 696 +++++++ python37/packages/bigtree/node/binarynode.py | 395 ++++ python37/packages/bigtree/node/dagnode.py | 570 ++++++ python37/packages/bigtree/node/node.py | 204 ++ python37/packages/bigtree/tree/__init__.py | 0 python37/packages/bigtree/tree/construct.py | 914 +++++++++ python37/packages/bigtree/tree/export.py | 831 +++++++++ python37/packages/bigtree/tree/helper.py | 201 ++ python37/packages/bigtree/tree/modify.py | 856 +++++++++ python37/packages/bigtree/tree/search.py | 316 ++++ python37/packages/bigtree/utils/__init__.py | 0 python37/packages/bigtree/utils/exceptions.py | 32 + python37/packages/bigtree/utils/iterators.py | 371 ++++ .../packages/bigtree/workflows/__init__.py | 0 .../packages/bigtree/workflows/app_todo.py | 249 +++ python37/packages/bigtree_info | Bin 0 -> 40960 bytes 69 files changed, 17449 insertions(+) create mode 100644 packages/EGG-INFO/PKG-INFO create mode 100644 packages/EGG-INFO/SOURCES.txt create mode 100644 packages/EGG-INFO/dependency_links.txt create mode 100644 packages/EGG-INFO/top_level.txt create mode 100644 packages/EGG-INFO/zip-safe create mode 100644 packages/elog.pth create mode 100644 packages/elog/__init__.py create mode 100644 packages/elog/logbook.py create mode 100644 packages/elog/logbook_exceptions.py create mode 100644 python310/packages/EGG-INFO/PKG-INFO create mode 100644 python310/packages/EGG-INFO/SOURCES.txt create mode 100644 python310/packages/EGG-INFO/dependency_links.txt create mode 100644 python310/packages/EGG-INFO/top_level.txt create mode 100644 python310/packages/EGG-INFO/zip-safe create mode 100644 python310/packages/bigtree/__init__.py create mode 100644 python310/packages/bigtree/binarytree/__init__.py create mode 100644 python310/packages/bigtree/binarytree/construct.py create mode 100644 python310/packages/bigtree/dag/__init__.py create mode 100644 python310/packages/bigtree/dag/construct.py create mode 100644 python310/packages/bigtree/dag/export.py create mode 100644 python310/packages/bigtree/globals.py create mode 100644 python310/packages/bigtree/node/__init__.py create mode 100644 python310/packages/bigtree/node/basenode.py create mode 100644 python310/packages/bigtree/node/binarynode.py create mode 100644 python310/packages/bigtree/node/dagnode.py create mode 100644 python310/packages/bigtree/node/node.py create mode 100644 python310/packages/bigtree/py.typed create mode 100644 python310/packages/bigtree/tree/__init__.py create mode 100644 python310/packages/bigtree/tree/construct.py create mode 100644 python310/packages/bigtree/tree/export.py create mode 100644 python310/packages/bigtree/tree/helper.py create mode 100644 python310/packages/bigtree/tree/modify.py create mode 100644 python310/packages/bigtree/tree/search.py create mode 100644 python310/packages/bigtree/utils/__init__.py create mode 100644 python310/packages/bigtree/utils/assertions.py create mode 100644 python310/packages/bigtree/utils/constants.py create mode 100644 python310/packages/bigtree/utils/exceptions.py create mode 100644 python310/packages/bigtree/utils/groot.py create mode 100644 python310/packages/bigtree/utils/iterators.py create mode 100644 python310/packages/bigtree/utils/plot.py create mode 100644 python310/packages/bigtree/workflows/__init__.py create mode 100644 python310/packages/bigtree/workflows/app_calendar.py create mode 100644 python310/packages/bigtree/workflows/app_todo.py create mode 100644 python310/packages/elog.pth create mode 100644 python310/packages/elog/__init__.py create mode 100644 python310/packages/elog/logbook.py create mode 100644 python310/packages/elog/logbook_exceptions.py create mode 100644 python37/packages/bigtree/__init__.py create mode 100644 python37/packages/bigtree/binarytree/construct.py create mode 100644 python37/packages/bigtree/dag/__init__.py create mode 100644 python37/packages/bigtree/dag/construct.py create mode 100644 python37/packages/bigtree/dag/export.py create mode 100644 python37/packages/bigtree/node/__init__.py create mode 100644 python37/packages/bigtree/node/basenode.py create mode 100644 python37/packages/bigtree/node/binarynode.py create mode 100644 python37/packages/bigtree/node/dagnode.py create mode 100644 python37/packages/bigtree/node/node.py create mode 100644 python37/packages/bigtree/tree/__init__.py create mode 100644 python37/packages/bigtree/tree/construct.py create mode 100644 python37/packages/bigtree/tree/export.py create mode 100644 python37/packages/bigtree/tree/helper.py create mode 100644 python37/packages/bigtree/tree/modify.py create mode 100644 python37/packages/bigtree/tree/search.py create mode 100644 python37/packages/bigtree/utils/__init__.py create mode 100644 python37/packages/bigtree/utils/exceptions.py create mode 100644 python37/packages/bigtree/utils/iterators.py create mode 100644 python37/packages/bigtree/workflows/__init__.py create mode 100644 python37/packages/bigtree/workflows/app_todo.py create mode 100644 python37/packages/bigtree_info diff --git a/packages/EGG-INFO/PKG-INFO b/packages/EGG-INFO/PKG-INFO new file mode 100644 index 0000000..39ef272 --- /dev/null +++ b/packages/EGG-INFO/PKG-INFO @@ -0,0 +1,111 @@ +Metadata-Version: 1.0 +Name: elog +Version: 1.3.4 +Summary: Python library to access Elog. +Home-page: https://github.com/paulscherrerinstitute/py_elog +Author: Paul Scherrer Institute (PSI) +Author-email: UNKNOWN +License: UNKNOWN +Description: [![Build Status](https://travis-ci.org/paulscherrerinstitute/py_elog.svg?branch=master)](https://travis-ci.org/paulscherrerinstitute/py_elog) [![Build status](https://ci.appveyor.com/api/projects/status/glo428gqw951y512?svg=true)](https://ci.appveyor.com/project/simongregorebner/py-elog) + + # Overview + This Python module provides a native interface [electronic logbooks](https://midas.psi.ch/elog/). It is compatible with Python versions 3.5 and higher. + + # Usage + + For accessing a logbook at ```http[s]://:/[/]/[]``` a logbook handle must be retrieved. + + ```python + import elog + + # Open GFA SwissFEL test logbook + logbook = elog.open('https://elog-gfa.psi.ch/SwissFEL+test/') + + # Contstructor using detailed arguments + # Open demo logbook on local host: http://localhost:8080/demo/ + logbook = elog.open('localhost', 'demo', port=8080, use_ssl=False) + ``` + + Once you have hold of the logbook handle one of its public methods can be used to read, create, reply to, edit or delete the message. + + ## Get Existing Message Ids + Get all the existing message ids of a logbook + + ```python + message_ids = logbook.get_message_ids() + ``` + + To get if of the last inserted message + ```python + last_message_id = logbook.get_last_message_id() + ``` + + ## Read Message + + ```python + # Read message with with message ID = 23 + message, attributes, attachments = logbook.read(23) + ``` + + ## Create Message + + ```python + # Create new message with some text, attributes (dict of attributes + kwargs) and attachments + new_msg_id = logbook.post('This is message text', attributes=dict_of_attributes, attachments=list_of_attachments, attribute_as_param='value') + ``` + + What attributes are required is determined by the configuration of the elog server (keywork `Required Attributes`). + If the configuration looks like this: + + ``` + Required Attributes = Author, Type + ``` + + You have to provide author and type when posting a message. + + In case type need to be specified, the supported keywords can as well be found in the elog configuration with the key `Options Type`. + + If the config looks like this: + ``` + Options Type = Routine, Software Installation, Problem Fixed, Configuration, Other + ``` + + A working create call would look like this: + + ```python + new_msg_id = logbook.post('This is message text', author='me', type='Routine') + ``` + + + + ## Reply to Message + + ```python + # Reply to message with ID=23 + new_msg_id = logbook.post('This is a reply', msg_id=23, reply=True, attributes=dict_of_attributes, attachments=list_of_attachments, attribute_as_param='value') + ``` + + ## Edit Message + + ```python + # Edit message with ID=23. Changed message text, some attributes (dict of edited attributes + kwargs) and new attachments + edited_msg_id = logbook.post('This is new message text', msg_id=23, attributes=dict_of_changed_attributes, attachments=list_of_new_attachments, attribute_as_param='new value') + ``` + + ## Delete Message (and all its replies) + + ```python + # Delete message with ID=23. All its replies will also be deleted. + logbook.delete(23) + ``` + + __Note:__ Due to the way elog implements delete this function is only supported on english logbooks. + + # Installation + The Elog module and only depends on the `passlib` and `requests` library used for password encryption and http(s) communication. It is packed as [anaconda package](https://anaconda.org/paulscherrerinstitute/elog) and can be installed as follows: + + ```bash + conda install -c paulscherrerinstitute elog + ``` +Keywords: elog,electronic,logbook +Platform: UNKNOWN diff --git a/packages/EGG-INFO/SOURCES.txt b/packages/EGG-INFO/SOURCES.txt new file mode 100644 index 0000000..f64fe82 --- /dev/null +++ b/packages/EGG-INFO/SOURCES.txt @@ -0,0 +1,8 @@ +setup.py +elog/__init__.py +elog/logbook.py +elog/logbook_exceptions.py +elog.egg-info/PKG-INFO +elog.egg-info/SOURCES.txt +elog.egg-info/dependency_links.txt +elog.egg-info/top_level.txt \ No newline at end of file diff --git a/packages/EGG-INFO/dependency_links.txt b/packages/EGG-INFO/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/packages/EGG-INFO/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/packages/EGG-INFO/top_level.txt b/packages/EGG-INFO/top_level.txt new file mode 100644 index 0000000..e729a95 --- /dev/null +++ b/packages/EGG-INFO/top_level.txt @@ -0,0 +1 @@ +elog diff --git a/packages/EGG-INFO/zip-safe b/packages/EGG-INFO/zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/packages/EGG-INFO/zip-safe @@ -0,0 +1 @@ + diff --git a/packages/elog.pth b/packages/elog.pth new file mode 100644 index 0000000..92dcaa9 --- /dev/null +++ b/packages/elog.pth @@ -0,0 +1 @@ +./elog-1.3.4-py3.7.egg diff --git a/packages/elog/__init__.py b/packages/elog/__init__.py new file mode 100644 index 0000000..f037dec --- /dev/null +++ b/packages/elog/__init__.py @@ -0,0 +1,13 @@ +from elog.logbook import Logbook +from elog.logbook import LogbookError, LogbookAuthenticationError, LogbookServerProblem, LogbookMessageRejected, \ + LogbookInvalidMessageID, LogbookInvalidAttachmentType + + +def open(*args, **kwargs): + """ + Will return a Logbook object. All arguments are passed to the logbook constructor. + :param args: + :param kwargs: + :return: Logbook() instance + """ + return Logbook(*args, **kwargs) diff --git a/packages/elog/logbook.py b/packages/elog/logbook.py new file mode 100644 index 0000000..c2bfe26 --- /dev/null +++ b/packages/elog/logbook.py @@ -0,0 +1,571 @@ +import requests +import urllib.parse +import os +import builtins +import re +from elog.logbook_exceptions import * +from datetime import datetime + + +class Logbook(object): + """ + Logbook provides methods to interface with logbook on location: "server:port/subdir/logbook". User can create, + edit, delete logbook messages. + """ + + def __init__(self, hostname, logbook='', port=None, user=None, password=None, subdir='', use_ssl=True, + encrypt_pwd=True): + """ + :param hostname: elog server hostname. If whole url is specified here, it will be parsed and arguments: + "logbook, port, subdir, use_ssl" will be overwritten by parsed values. + :param logbook: name of the logbook on the elog server + :param port: elog server port (if not specified will default to '80' if use_ssl=False or '443' if use_ssl=True + :param user: username (if authentication needed) + :param password: password (if authentication needed) Password will be encrypted with sha256 unless + encrypt_pwd=False (default: True) + :param subdir: subdirectory of logbooks locations + :param use_ssl: connect using ssl (ignored if url starts with 'http://'' or 'https://'? + :param encrypt_pwd: To avoid exposing password in the code, this flag can be set to False and password + will then be handled as it is (user needs to provide sha256 encrypted password with + salt= '' and rounds=5000) + :return: + """ + hostname = hostname.strip() + + # parse url to see if some parameters are defined with url + parsed_url = urllib.parse.urlsplit(hostname) + + # ---- handle SSL ----- + # hostname must be modified according to use_ssl flag. If hostname starts with https:// or http:// + # the use_ssl flag is ignored + url_scheme = parsed_url.scheme + if url_scheme == 'http': + use_ssl = False + + elif url_scheme == 'https': + use_ssl = True + + elif not url_scheme: + # add http or https + if use_ssl: + url_scheme = 'https' + else: + url_scheme = 'http' + + # ---- handle port ----- + # 1) by default use port defined in the url + # 2) remove any 'default' ports such as 80 for http and 443 for https + # 3) if port not defined in url and not 'default' add it to netloc + + netloc = parsed_url.netloc + if netloc == "" and "localhost" in hostname: + netloc = 'localhost' + netloc_split = netloc.split(':') + if len(netloc_split) > 1: + # port defined in url --> remove if needed + port = netloc_split[1] + if (port == 80 and not use_ssl) or (port == 443 and use_ssl): + netloc = netloc_split[0] + + else: + # add port info if needed + if port is not None and not (port == 80 and not use_ssl) and not (port == 443 and use_ssl): + netloc += ':{}'.format(port) + + # ---- handle subdir and logbook ----- + # parsed_url.path = /// + + # Remove last '/' for easier parsing + url_path = parsed_url.path + if url_path.endswith('/'): + url_path = url_path[:-1] + + splitted_path = url_path.split('/') + if url_path and len(splitted_path) > 1: + # If here ... then at least some part of path is defined. + + # If logbook defined --> treat path current path as subdir and add logbook at the end + # to define the full path. Else treat existing path as /. + # Put first and last '/' back on its place + if logbook: + url_path += '/{}'.format(logbook) + else: + logbook = splitted_path[-1] + + else: + # There is nothing. Use arguments. + url_path = subdir + '/' + logbook + + # urllib.parse.quote replaces special characters with %xx escapes + # self._logbook_path = urllib.parse.quote('/' + url_path + '/').replace('//', '/') + self._logbook_path = ('/' + url_path + '/').replace('//', '/') + + self._url = url_scheme + '://' + netloc + self._logbook_path + self.logbook = logbook + self._user = user + self._password = _handle_pswd(password, encrypt_pwd) + + def post(self, message, msg_id=None, reply=False, attributes=None, attachments=None, encoding=None, + **kwargs): + """ + Posts message to the logbook. If msg_id is not specified new message will be created, otherwise existing + message will be edited, or a reply (if reply=True) to it will be created. This method returns the msg_id + of the newly created message. + + :param message: string with message text + :param msg_id: ID number of message to edit or reply. If not specified new message is created. + :param reply: If 'True' reply to existing message is created instead of editing it + :param attributes: Dictionary of attributes. Following attributes are used internally by the elog and will be + ignored: Text, Date, Encoding, Reply to, In reply to, Locked by, Attachment + :param attachments: list of: + - file like objects which read() will return bytes (if file_like_object.name is not + defined, default name "attachment" will be used. + - paths to the files + All items will be appended as attachment to the elog entry. In case of unknown + attachment an exception LogbookInvalidAttachment will be raised. + :param encoding: Defines encoding of the message. Can be: 'plain' -> plain text, 'html'->html-text, + 'ELCode' --> elog formatting syntax + :param kwargs: Anything in the kwargs will be interpreted as attribute. e.g.: logbook.post('Test text', + Author='Rok Vintar), "Author" will be sent as an attribute. If named same as one of the + attributes defined in "attributes", kwargs will have priority. + + :return: msg_id + """ + + attributes = attributes or {} + attributes = {**attributes, **kwargs} # kwargs as attributes with higher priority + + attachments = attachments or [] + + if encoding is not None: + if encoding not in ['plain', 'HTML', 'ELCode']: + raise LogbookMessageRejected('Invalid message encoding. Valid options: plain, HTML, ELCode.') + attributes['Encoding'] = encoding + + attributes_to_edit = dict() + if msg_id: + # Message exists, we can continue + if reply: + # Verify that there is a message on the server, otherwise do not reply to it! + self._check_if_message_on_server(msg_id) # raises exception in case of none existing message + + attributes['reply_to'] = str(msg_id) + + else: # Edit existing + attributes['edit_id'] = str(msg_id) + attributes['skiplock'] = '1' + + # Handle existing attachments + msg_to_edit, attributes_to_edit, attach_to_edit = self.read(msg_id) + + i = 0 + for attachment in attach_to_edit: + if attachment: + # Existing attachments must be passed as regular arguments attachment with value= file name + # Read message returnes full urls to existing attachments: + # :[][/// + attributes['attachment' + str(i)] = os.path.basename(attachment) + i += 1 + + for attribute, data in attributes.items(): + new_data = attributes.get(attribute) + if new_data is not None: + attributes_to_edit[attribute] = new_data + else: + # As we create a new message, specify creation time if not already specified in attributes + if 'When' not in attributes: + attributes['When'] = int(datetime.now().timestamp()) + + if not attributes_to_edit: + attributes_to_edit = attributes + # Remove any attributes that should not be sent + _remove_reserved_attributes(attributes_to_edit) + + if attachments: + files_to_attach, objects_to_close = self._prepare_attachments(attachments) + else: + objects_to_close = list() + files_to_attach = list() + + # Make requests module think that Text is a "file". This is the only way to force requests to send data as + # multipart/form-data even if there are no attachments. Elog understands only multipart/form-data + files_to_attach.append(('Text', ('', message))) + + # Base attributes are common to all messages + self._add_base_msg_attributes(attributes_to_edit) + + # Keys in attributes cannot have certain characters like whitespaces or dashes for the http request + attributes_to_edit = _replace_special_characters_in_attribute_keys(attributes_to_edit) + + try: + response = requests.post(self._url, data=attributes_to_edit, files=files_to_attach, allow_redirects=False, + verify=False) + # Validate response. Any problems will raise an Exception. + resp_message, resp_headers, resp_msg_id = _validate_response(response) + + # Close file like objects that were opened by the elog (if path + for file_like_object in objects_to_close: + if hasattr(file_like_object, 'close'): + file_like_object.close() + + except requests.RequestException as e: + # Check if message on server. + self._check_if_message_on_server(msg_id) # raises exceptions if no message or no response from server + + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to post a message, ' + 'because of:\n' + + '{0}'.format(e)) + + # Any error before here should raise an exception, but check again for nay case. + if not resp_msg_id or resp_msg_id < 1: + raise LogbookInvalidMessageID('Invalid message ID: ' + str(resp_msg_id) + ' returned') + return resp_msg_id + + def read(self, msg_id): + """ + Reads message from the logbook server and returns tuple of (message, attributes, attachments) where: + message: string with message body + attributes: dictionary of all attributes returned by the logbook + attachments: list of urls to attachments on the logbook server + + :param msg_id: ID of the message to be read + :return: message, attributes, attachments + """ + + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + + try: + self._check_if_message_on_server(msg_id) # raises exceptions if no message or no response from server + response = requests.get(self._url + str(msg_id) + '?cmd=download', headers=request_headers, + allow_redirects=False, verify=False) + + # Validate response. If problems Exception will be thrown. + resp_message, resp_headers, resp_msg_id = _validate_response(response) + + except requests.RequestException as e: + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to read the message with ID: ' + str(msg_id) + + 'because of:\n' + '{0}'.format(e)) + + # Parse message to separate message body, attributes and attachments + attributes = dict() + attachments = list() + + returned_msg = resp_message.decode('utf-8', 'ignore').splitlines() + delimiter_idx = returned_msg.index('========================================') + + message = '\n'.join(returned_msg[delimiter_idx + 1:]) + for line in returned_msg[0:delimiter_idx]: + line = line.split(': ') + data = ''.join(line[1:]) + if line[0] == 'Attachment': + attachments = data.split(',') + # Here are only attachment names, make a full url out of it, so they could be + # recognisable by others, and downloaded if needed + attachments = [self._url + '{0}'.format(i) for i in attachments] + else: + attributes[line[0]] = data + + return message, attributes, attachments + + def delete(self, msg_id): + """ + Deletes message thread (!!!message + all replies!!!) from logbook. + It also deletes all of attachments of corresponding messages from the server. + + :param msg_id: message to be deleted + :return: + """ + + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + + try: + self._check_if_message_on_server(msg_id) # check if something to delete + + response = requests.get(self._url + str(msg_id) + '?cmd=Delete&confirm=Yes', headers=request_headers, + allow_redirects=False, verify=False) + + _validate_response(response) # raises exception if any other error identified + + except requests.RequestException as e: + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to delete the message with ID: ' + str(msg_id) + + 'because of:\n' + '{0}'.format(e)) + + # Additional validation: If successfully deleted then status_code = 302. In case command was not executed at + # all (not English language --> no download command supported) status_code = 200 and the content is just a + # html page of this whole message. + if response.status_code == 200: + raise LogbookServerProblem('Cannot process delete command (only logbooks in English supported).') + + def search(self, search_term, n_results = 20, scope="subtext"): + """ + Searches the logbook and returns the message ids. + + """ + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + + # Putting n_results = 0 crashes the elog. also in the web-gui. + n_results = 1 if n_results < 1 else n_results + + params = { + "mode": "full", + "reverse": "1", + "npp": n_results, + scope: search_term + } + + try: + response = requests.get(self._url, params=params, headers=request_headers, + allow_redirects=False, verify=False) + + # Validate response. If problems Exception will be thrown. + _validate_response(response) + resp_message = response + + except requests.RequestException as e: + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to read message ids ' + 'because of:\n' + '{0}'.format(e)) + + from lxml import html + tree = html.fromstring(resp_message.content) + message_ids = tree.xpath('(//tr/td[@class="list1" or @class="list2"][1])/a/@href') + message_ids = [int(m.split("/")[-1]) for m in message_ids] + return message_ids + + + def get_last_message_id(self): + ids = self.get_message_ids() + if len(ids) > 0: + return ids[0] + else: + return None + + def get_message_ids(self): + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + + try: + response = requests.get(self._url + 'page', headers=request_headers, + allow_redirects=False, verify=False) + + # Validate response. If problems Exception will be thrown. + _validate_response(response) + resp_message = response + + except requests.RequestException as e: + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to read message ids ' + 'because of:\n' + '{0}'.format(e)) + + from lxml import html + tree = html.fromstring(resp_message.content) + message_ids = tree.xpath('(//tr/td[@class="list1" or @class="list2"][1])/a/@href') + message_ids = [int(m.split("/")[-1]) for m in message_ids] + return message_ids + + def _check_if_message_on_server(self, msg_id): + """Try to load page for specific message. If there is a htm tag like then there is no + such message. + + :param msg_id: ID of message to be checked + :return: + """ + + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + try: + response = requests.get(self._url + str(msg_id), headers=request_headers, allow_redirects=False, + verify=False) + + # If there is no message code 200 will be returned (OK) and _validate_response will not recognise it + # but there will be some error in the html code. + resp_message, resp_headers, resp_msg_id = _validate_response(response) + # If there is no message, code 200 will be returned (OK) but there will be some error indication in + # the html code. + if re.findall('.*?', + resp_message.decode('utf-8', 'ignore'), + flags=re.DOTALL): + raise LogbookInvalidMessageID('Message with ID: ' + str(msg_id) + ' does not exist on logbook.') + + except requests.RequestException as e: + raise LogbookServerProblem('No response from the logbook server.\nDetails: ' + '{0}'.format(e)) + + def _add_base_msg_attributes(self, data): + """ + Adds base message attributes which are used by all messages. + :param data: dict of current attributes + :return: content string + """ + data['cmd'] = 'Submit' + data['exp'] = self.logbook + if self._user: + data['unm'] = self._user + if self._password: + data['upwd'] = self._password + + def _prepare_attachments(self, files): + """ + Parses attachments to content objects. Attachments can be: + - file like objects: must have method read() which returns bytes. If it has attribute .name it will be used + for attachment name, otherwise generic attribute name will be used. + - path to the file on disk + + Note that if attachment is is an url pointing to the existing Logbook server it will be ignored and no + exceptions will be raised. This can happen if attachments returned with read_method are resend. + + :param files: list of file like objects or paths + :return: content string + """ + prepared = list() + i = 0 + objects_to_close = list() # objects that are created (opened) by elog must be later closed + for file_obj in files: + if hasattr(file_obj, 'read'): + i += 1 + attribute_name = 'attfile' + str(i) + + filename = attribute_name # If file like object has no name specified use this one + candidate_filename = os.path.basename(file_obj.name) + + if filename: # use only if not empty string + filename = candidate_filename + + elif isinstance(file_obj, str): + # Check if it is: + # - a path to the file --> open file and append + # - an url pointing to the existing Logbook server --> ignore + + filename = "" + attribute_name = "" + + if os.path.isfile(file_obj): + i += 1 + attribute_name = 'attfile' + str(i) + + file_obj = builtins.open(file_obj, 'rb') + filename = os.path.basename(file_obj.name) + + objects_to_close.append(file_obj) + + elif not file_obj.startswith(self._url): + raise LogbookInvalidAttachmentType('Invalid type of attachment: \"' + file_obj + '\".') + else: + raise LogbookInvalidAttachmentType('Invalid type of attachment[' + str(i) + '].') + + prepared.append((attribute_name, (filename, file_obj))) + + return prepared, objects_to_close + + def _make_user_and_pswd_cookie(self): + """ + prepares user name and password cookie. It is sent in header when posting a message. + :return: user name and password value for the Cookie header + """ + cookie = '' + if self._user: + cookie += 'unm=' + self._user + ';' + if self._password: + cookie += 'upwd=' + self._password + ';' + + return cookie + + +def _remove_reserved_attributes(attributes): + """ + Removes elog reserved attributes (from the attributes dict) that can not be sent. + + :param attributes: dictionary of attributes to be cleaned. + :return: + """ + + if attributes: + attributes.get('$@MID@$', None) + attributes.pop('Date', None) + attributes.pop('Attachment', None) + attributes.pop('Text', None) # Remove this one because it will be send attachment like + + +def _replace_special_characters_in_attribute_keys(attributes): + """ + Replaces special characters in elog attribute keys by underscore, otherwise attribute values will be erased in + the http request. This is using the same replacement elog itself is using to handle these cases + + :param attributes: dictionary of attributes to be cleaned. + :return: attributes with replaced keys + """ + return {re.sub('[^0-9a-zA-Z]', '_', key): value for key, value in attributes.items()} + + +def _validate_response(response): + """ Validate response of the request.""" + + msg_id = None + + if response.status_code not in [200, 302]: + # 200 --> OK; 302 --> Found + # Html page is returned with error description (handling errors same way as on original client. Looks + # like there is no other way. + + err = re.findall('.*?', + response.content.decode('utf-8', 'ignore'), + flags=re.DOTALL) + + if len(err) > 0: + # Remove html tags + # If part of the message has: Please go back... remove this part since it is an instruction for + # the user when using browser. + err = re.sub('(?:<.*?>)', '', err[0]) + if err: + raise LogbookMessageRejected('Rejected because of: ' + err) + else: + raise LogbookMessageRejected('Rejected because of unknown error.') + + # Other unknown errors + raise LogbookMessageRejected('Rejected because of unknown error.') + else: + location = response.headers.get('Location') + if location is not None: + if 'has moved' in location: + raise LogbookServerProblem('Logbook server has moved to another location.') + elif 'fail' in location: + raise LogbookAuthenticationError('Invalid username or password.') + else: + # returned locations is something like: '/// + # with urllib.parse.urlparse returns attribute path=// + msg_id = int(urllib.parse.urlsplit(location).path.split('/')[-1]) + + if b'form name=form1' in response.content or b'type=password' in response.content: + # Not to smart to check this way, but no other indication of this kind of error. + # C client does it the same way + raise LogbookAuthenticationError('Invalid username or password.') + + return response.content, response.headers, msg_id + + +def _handle_pswd(password, encrypt=True): + """ + Takes password string and returns password as needed by elog. If encrypt=True then password will be + sha256 encrypted (salt='', rounds=5000). Before returning password, any trailing $5$$ will be removed + independent off encrypt flag. + + :param password: password string + :param encrypt: encrypt password? + :return: elog prepared password + """ + if encrypt and password: + from passlib.hash import sha256_crypt + return sha256_crypt.encrypt(password, salt='', rounds=5000)[4:] + elif password and password.startswith('$5$$'): + return password[4:] + else: + return password diff --git a/packages/elog/logbook_exceptions.py b/packages/elog/logbook_exceptions.py new file mode 100644 index 0000000..64bea05 --- /dev/null +++ b/packages/elog/logbook_exceptions.py @@ -0,0 +1,28 @@ +class LogbookError(Exception): + """ Parent logbook exception.""" + pass + + +class LogbookAuthenticationError(LogbookError): + """ Raise when problem with username and password.""" + pass + + +class LogbookServerProblem(LogbookError): + """ Raise when problem accessing logbook server.""" + pass + + +class LogbookMessageRejected(LogbookError): + """ Raised when manipulating/creating message was rejected by the server or there was problem composing message.""" + pass + + +class LogbookInvalidMessageID(LogbookMessageRejected): + """ Raised when there is no message with specified ID on the server.""" + pass + + +class LogbookInvalidAttachmentType(LogbookMessageRejected): + """ Raised when passed attachment has invalid type.""" + pass diff --git a/python310/packages/EGG-INFO/PKG-INFO b/python310/packages/EGG-INFO/PKG-INFO new file mode 100644 index 0000000..39ef272 --- /dev/null +++ b/python310/packages/EGG-INFO/PKG-INFO @@ -0,0 +1,111 @@ +Metadata-Version: 1.0 +Name: elog +Version: 1.3.4 +Summary: Python library to access Elog. +Home-page: https://github.com/paulscherrerinstitute/py_elog +Author: Paul Scherrer Institute (PSI) +Author-email: UNKNOWN +License: UNKNOWN +Description: [![Build Status](https://travis-ci.org/paulscherrerinstitute/py_elog.svg?branch=master)](https://travis-ci.org/paulscherrerinstitute/py_elog) [![Build status](https://ci.appveyor.com/api/projects/status/glo428gqw951y512?svg=true)](https://ci.appveyor.com/project/simongregorebner/py-elog) + + # Overview + This Python module provides a native interface [electronic logbooks](https://midas.psi.ch/elog/). It is compatible with Python versions 3.5 and higher. + + # Usage + + For accessing a logbook at ```http[s]://:/[/]/[]``` a logbook handle must be retrieved. + + ```python + import elog + + # Open GFA SwissFEL test logbook + logbook = elog.open('https://elog-gfa.psi.ch/SwissFEL+test/') + + # Contstructor using detailed arguments + # Open demo logbook on local host: http://localhost:8080/demo/ + logbook = elog.open('localhost', 'demo', port=8080, use_ssl=False) + ``` + + Once you have hold of the logbook handle one of its public methods can be used to read, create, reply to, edit or delete the message. + + ## Get Existing Message Ids + Get all the existing message ids of a logbook + + ```python + message_ids = logbook.get_message_ids() + ``` + + To get if of the last inserted message + ```python + last_message_id = logbook.get_last_message_id() + ``` + + ## Read Message + + ```python + # Read message with with message ID = 23 + message, attributes, attachments = logbook.read(23) + ``` + + ## Create Message + + ```python + # Create new message with some text, attributes (dict of attributes + kwargs) and attachments + new_msg_id = logbook.post('This is message text', attributes=dict_of_attributes, attachments=list_of_attachments, attribute_as_param='value') + ``` + + What attributes are required is determined by the configuration of the elog server (keywork `Required Attributes`). + If the configuration looks like this: + + ``` + Required Attributes = Author, Type + ``` + + You have to provide author and type when posting a message. + + In case type need to be specified, the supported keywords can as well be found in the elog configuration with the key `Options Type`. + + If the config looks like this: + ``` + Options Type = Routine, Software Installation, Problem Fixed, Configuration, Other + ``` + + A working create call would look like this: + + ```python + new_msg_id = logbook.post('This is message text', author='me', type='Routine') + ``` + + + + ## Reply to Message + + ```python + # Reply to message with ID=23 + new_msg_id = logbook.post('This is a reply', msg_id=23, reply=True, attributes=dict_of_attributes, attachments=list_of_attachments, attribute_as_param='value') + ``` + + ## Edit Message + + ```python + # Edit message with ID=23. Changed message text, some attributes (dict of edited attributes + kwargs) and new attachments + edited_msg_id = logbook.post('This is new message text', msg_id=23, attributes=dict_of_changed_attributes, attachments=list_of_new_attachments, attribute_as_param='new value') + ``` + + ## Delete Message (and all its replies) + + ```python + # Delete message with ID=23. All its replies will also be deleted. + logbook.delete(23) + ``` + + __Note:__ Due to the way elog implements delete this function is only supported on english logbooks. + + # Installation + The Elog module and only depends on the `passlib` and `requests` library used for password encryption and http(s) communication. It is packed as [anaconda package](https://anaconda.org/paulscherrerinstitute/elog) and can be installed as follows: + + ```bash + conda install -c paulscherrerinstitute elog + ``` +Keywords: elog,electronic,logbook +Platform: UNKNOWN diff --git a/python310/packages/EGG-INFO/SOURCES.txt b/python310/packages/EGG-INFO/SOURCES.txt new file mode 100644 index 0000000..f64fe82 --- /dev/null +++ b/python310/packages/EGG-INFO/SOURCES.txt @@ -0,0 +1,8 @@ +setup.py +elog/__init__.py +elog/logbook.py +elog/logbook_exceptions.py +elog.egg-info/PKG-INFO +elog.egg-info/SOURCES.txt +elog.egg-info/dependency_links.txt +elog.egg-info/top_level.txt \ No newline at end of file diff --git a/python310/packages/EGG-INFO/dependency_links.txt b/python310/packages/EGG-INFO/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/python310/packages/EGG-INFO/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/python310/packages/EGG-INFO/top_level.txt b/python310/packages/EGG-INFO/top_level.txt new file mode 100644 index 0000000..e729a95 --- /dev/null +++ b/python310/packages/EGG-INFO/top_level.txt @@ -0,0 +1 @@ +elog diff --git a/python310/packages/EGG-INFO/zip-safe b/python310/packages/EGG-INFO/zip-safe new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/python310/packages/EGG-INFO/zip-safe @@ -0,0 +1 @@ + diff --git a/python310/packages/bigtree/__init__.py b/python310/packages/bigtree/__init__.py new file mode 100644 index 0000000..455ffc1 --- /dev/null +++ b/python310/packages/bigtree/__init__.py @@ -0,0 +1,78 @@ +__version__ = "0.16.2" + +from bigtree.binarytree.construct import list_to_binarytree +from bigtree.dag.construct import dataframe_to_dag, dict_to_dag, list_to_dag +from bigtree.dag.export import dag_to_dataframe, dag_to_dict, dag_to_dot, dag_to_list +from bigtree.node.basenode import BaseNode +from bigtree.node.binarynode import BinaryNode +from bigtree.node.dagnode import DAGNode +from bigtree.node.node import Node +from bigtree.tree.construct import ( + add_dataframe_to_tree_by_name, + add_dataframe_to_tree_by_path, + add_dict_to_tree_by_name, + add_dict_to_tree_by_path, + add_path_to_tree, + dataframe_to_tree, + dataframe_to_tree_by_relation, + dict_to_tree, + list_to_tree, + list_to_tree_by_relation, + nested_dict_to_tree, + newick_to_tree, + str_to_tree, +) +from bigtree.tree.export import ( + hprint_tree, + hyield_tree, + print_tree, + tree_to_dataframe, + tree_to_dict, + tree_to_dot, + tree_to_mermaid, + tree_to_nested_dict, + tree_to_newick, + tree_to_pillow, + yield_tree, +) +from bigtree.tree.helper import clone_tree, get_subtree, get_tree_diff, prune_tree +from bigtree.tree.modify import ( + copy_and_replace_nodes_from_tree_to_tree, + copy_nodes, + copy_nodes_from_tree_to_tree, + copy_or_shift_logic, + replace_logic, + shift_and_replace_nodes, + shift_nodes, +) +from bigtree.tree.search import ( + find, + find_attr, + find_attrs, + find_child, + find_child_by_name, + find_children, + find_full_path, + find_name, + find_names, + find_path, + find_paths, + find_relative_path, + findall, +) +from bigtree.utils.groot import speak_like_groot, whoami +from bigtree.utils.iterators import ( + dag_iterator, + inorder_iter, + levelorder_iter, + levelordergroup_iter, + postorder_iter, + preorder_iter, + zigzag_iter, + zigzaggroup_iter, +) +from bigtree.utils.plot import reingold_tilford +from bigtree.workflows.app_calendar import Calendar +from bigtree.workflows.app_todo import AppToDo + +sphinx_versions = ["latest", "0.16.2", "0.15.7", "0.14.8"] diff --git a/python310/packages/bigtree/binarytree/__init__.py b/python310/packages/bigtree/binarytree/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python310/packages/bigtree/binarytree/construct.py b/python310/packages/bigtree/binarytree/construct.py new file mode 100644 index 0000000..8ba4cb1 --- /dev/null +++ b/python310/packages/bigtree/binarytree/construct.py @@ -0,0 +1,53 @@ +from typing import List, Type + +from bigtree.node.binarynode import BinaryNode + +__all__ = ["list_to_binarytree"] + + +def list_to_binarytree( + heapq_list: List[int], node_type: Type[BinaryNode] = BinaryNode +) -> BinaryNode: + """Construct tree from a list of numbers (int or float) in heapq format. + + Examples: + >>> from bigtree import list_to_binarytree, tree_to_dot + >>> nums_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + >>> root = list_to_binarytree(nums_list) + >>> root.show() + 1 + ├── 2 + │ ├── 4 + │ │ ├── 8 + │ │ └── 9 + │ └── 5 + │ └── 10 + └── 3 + ├── 6 + └── 7 + >>> graph = tree_to_dot(root, node_colour="gold") + >>> graph.write_png("assets/construct_binarytree.png") + + ![Sample Binary Tree](https://github.com/kayjan/bigtree/raw/master/assets/construct_binarytree.png) + + Args: + heapq_list (List[int]): list containing integer node names, ordered in heapq fashion + node_type (Type[BinaryNode]): node type of tree to be created, defaults to ``BinaryNode`` + + Returns: + (BinaryNode) + """ + if not len(heapq_list): + raise ValueError("Input list does not contain any data, check `heapq_list`") + + root_node = node_type(heapq_list[0]) + node_list = [root_node] + for idx, num in enumerate(heapq_list): + if idx: + if idx % 2: + parent_idx = int((idx - 1) / 2) + else: + parent_idx = int((idx - 2) / 2) + node = node_type(num, parent=node_list[parent_idx]) # type: ignore + node_list.append(node) + return root_node diff --git a/python310/packages/bigtree/dag/__init__.py b/python310/packages/bigtree/dag/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python310/packages/bigtree/dag/construct.py b/python310/packages/bigtree/dag/construct.py new file mode 100644 index 0000000..7054610 --- /dev/null +++ b/python310/packages/bigtree/dag/construct.py @@ -0,0 +1,206 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Tuple, Type + +from bigtree.node.dagnode import DAGNode +from bigtree.utils.exceptions import optional_dependencies_pandas + +try: + import pandas as pd +except ImportError: # pragma: no cover + pd = None + +__all__ = ["list_to_dag", "dict_to_dag", "dataframe_to_dag"] + + +@optional_dependencies_pandas +def list_to_dag( + relations: List[Tuple[str, str]], + node_type: Type[DAGNode] = DAGNode, +) -> DAGNode: + """Construct DAG from list of tuples containing parent-child names. + Note that node names must be unique. + + Examples: + >>> from bigtree import list_to_dag, dag_iterator + >>> relations_list = [("a", "c"), ("a", "d"), ("b", "c"), ("c", "d"), ("d", "e")] + >>> dag = list_to_dag(relations_list) + >>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(dag)] + [('a', 'd'), ('c', 'd'), ('d', 'e'), ('a', 'c'), ('b', 'c')] + + Args: + relations (List[Tuple[str, str]]): list containing tuple of parent-child names + node_type (Type[DAGNode]): node type of DAG to be created, defaults to ``DAGNode`` + + Returns: + (DAGNode) + """ + if not len(relations): + raise ValueError("Input list does not contain any data, check `relations`") + + relation_data = pd.DataFrame(relations, columns=["parent", "child"]) + return dataframe_to_dag( + relation_data, child_col="child", parent_col="parent", node_type=node_type + ) + + +def dict_to_dag( + relation_attrs: Dict[str, Any], + parent_key: str = "parents", + node_type: Type[DAGNode] = DAGNode, +) -> DAGNode: + """Construct DAG from nested dictionary, ``key``: child name, ``value``: dictionary of parent names, attribute + name, and attribute value. + Note that node names must be unique. + + Examples: + >>> from bigtree import dict_to_dag, dag_iterator + >>> relation_dict = { + ... "a": {"step": 1}, + ... "b": {"step": 1}, + ... "c": {"parents": ["a", "b"], "step": 2}, + ... "d": {"parents": ["a", "c"], "step": 2}, + ... "e": {"parents": ["d"], "step": 3}, + ... } + >>> dag = dict_to_dag(relation_dict, parent_key="parents") + >>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(dag)] + [('a', 'd'), ('c', 'd'), ('d', 'e'), ('a', 'c'), ('b', 'c')] + + Args: + relation_attrs (Dict[str, Any]): dictionary containing node, node parents, and node attribute information, + key: child name, value: dictionary of parent names, node attribute, and attribute value + parent_key (str): key of dictionary to retrieve list of parents name, defaults to 'parent' + node_type (Type[DAGNode]): node type of DAG to be created, defaults to ``DAGNode`` + + Returns: + (DAGNode) + """ + if not len(relation_attrs): + raise ValueError("Dictionary does not contain any data, check `relation_attrs`") + + # Convert dictionary to dataframe + data = pd.DataFrame(relation_attrs).T.rename_axis("_tmp_child").reset_index() + if parent_key not in data: + raise ValueError( + f"Parent key {parent_key} not in dictionary, check `relation_attrs` and `parent_key`" + ) + + data = data.explode(parent_key) + return dataframe_to_dag( + data, + child_col="_tmp_child", + parent_col=parent_key, + node_type=node_type, + ) + + +@optional_dependencies_pandas +def dataframe_to_dag( + data: pd.DataFrame, + child_col: str = "", + parent_col: str = "", + attribute_cols: List[str] = [], + node_type: Type[DAGNode] = DAGNode, +) -> DAGNode: + """Construct DAG from pandas DataFrame. + Note that node names must be unique. + + - `child_col` and `parent_col` specify columns for child name and parent name to construct DAG. + - `attribute_cols` specify columns for node attribute for child name. + - If columns are not specified, `child_col` takes first column, `parent_col` takes second column, and all other + columns are `attribute_cols`. + + Examples: + >>> import pandas as pd + >>> from bigtree import dataframe_to_dag, dag_iterator + >>> relation_data = pd.DataFrame([ + ... ["a", None, 1], + ... ["b", None, 1], + ... ["c", "a", 2], + ... ["c", "b", 2], + ... ["d", "a", 2], + ... ["d", "c", 2], + ... ["e", "d", 3], + ... ], + ... columns=["child", "parent", "step"] + ... ) + >>> dag = dataframe_to_dag(relation_data) + >>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(dag)] + [('a', 'd'), ('c', 'd'), ('d', 'e'), ('a', 'c'), ('b', 'c')] + + Args: + data (pd.DataFrame): data containing path and node attribute information + child_col (str): column of data containing child name information, defaults to '' + if not set, it will take the first column of data + parent_col (str): column of data containing parent name information, defaults to '' + if not set, it will take the second column of data + attribute_cols (List[str]): columns of data containing child node attribute information, + if not set, it will take all columns of data except `child_col` and `parent_col` + node_type (Type[DAGNode]): node type of DAG to be created, defaults to ``DAGNode`` + + Returns: + (DAGNode) + """ + data = data.copy() + + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not child_col: + child_col = data.columns[0] + elif child_col not in data.columns: + raise ValueError(f"Child column not in data, check `child_col`: {child_col}") + if not parent_col: + parent_col = data.columns[1] + elif parent_col not in data.columns: + raise ValueError(f"Parent column not in data, check `parent_col`: {parent_col}") + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(child_col) + attribute_cols.remove(parent_col) + elif any([col not in data.columns for col in attribute_cols]): + raise ValueError( + f"One or more attribute column(s) not in data, check `attribute_cols`: {attribute_cols}" + ) + + data_check = data.copy()[[child_col, parent_col] + attribute_cols].drop_duplicates( + subset=[child_col] + attribute_cols + ) + _duplicate_check = ( + data_check[child_col] + .value_counts() + .to_frame("counts") + .rename_axis(child_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate child name with different attributes\n" + f"Check {_duplicate_check}" + ) + if sum(data[child_col].isnull()): + raise ValueError(f"Child name cannot be empty, check column: {child_col}") + + node_dict: Dict[str, DAGNode] = dict() + parent_node = DAGNode() + + for row in data.reset_index(drop=True).to_dict(orient="index").values(): + child_name = row[child_col] + parent_name = row[parent_col] + node_attrs = row.copy() + del node_attrs[child_col] + del node_attrs[parent_col] + node_attrs = {k: v for k, v in node_attrs.items() if not pd.isnull(v)} + child_node = node_dict.get(child_name, node_type(child_name)) + child_node.set_attrs(node_attrs) + node_dict[child_name] = child_node + + if not pd.isnull(parent_name): + parent_node = node_dict.get(parent_name, node_type(parent_name)) + node_dict[parent_name] = parent_node + child_node.parents = [parent_node] + + return parent_node diff --git a/python310/packages/bigtree/dag/export.py b/python310/packages/bigtree/dag/export.py new file mode 100644 index 0000000..c39978a --- /dev/null +++ b/python310/packages/bigtree/dag/export.py @@ -0,0 +1,298 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Tuple, TypeVar, Union + +from bigtree.node.dagnode import DAGNode +from bigtree.utils.exceptions import ( + optional_dependencies_image, + optional_dependencies_pandas, +) +from bigtree.utils.iterators import dag_iterator + +try: + import pandas as pd +except ImportError: # pragma: no cover + pd = None + +try: + import pydot +except ImportError: # pragma: no cover + pydot = None + +__all__ = ["dag_to_list", "dag_to_dict", "dag_to_dataframe", "dag_to_dot"] + + +T = TypeVar("T", bound=DAGNode) + + +def dag_to_list( + dag: T, +) -> List[Tuple[str, str]]: + """Export DAG to list of tuples containing parent-child names + + Examples: + >>> from bigtree import DAGNode, dag_to_list + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> dag_to_list(a) + [('a', 'c'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('d', 'e')] + + Args: + dag (DAGNode): DAG to be exported + + Returns: + (List[Tuple[str, str]]) + """ + relations = [] + for parent_node, child_node in dag_iterator(dag): + relations.append((parent_node.node_name, child_node.node_name)) + return relations + + +def dag_to_dict( + dag: T, + parent_key: str = "parents", + attr_dict: Dict[str, str] = {}, + all_attrs: bool = False, +) -> Dict[str, Any]: + """Export DAG to dictionary. + + Exported dictionary will have key as child name, and parent names and node attributes as a nested dictionary. + + Examples: + >>> from bigtree import DAGNode, dag_to_dict + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> dag_to_dict(a, parent_key="parent", attr_dict={"step": "step no."}) + {'a': {'step no.': 1}, 'c': {'parent': ['a', 'b'], 'step no.': 2}, 'd': {'parent': ['a', 'c'], 'step no.': 2}, 'b': {'step no.': 1}, 'e': {'parent': ['d'], 'step no.': 3}} + + Args: + dag (DAGNode): DAG to be exported + parent_key (str): dictionary key for `node.parent.node_name`, defaults to `parents` + attr_dict (Dict[str, str]): dictionary mapping node attributes to dictionary key, + key: node attributes, value: corresponding dictionary key, optional + all_attrs (bool): indicator whether to retrieve all `Node` attributes, defaults to False + + Returns: + (Dict[str, Any]) + """ + dag = dag.copy() + data_dict = {} + + for parent_node, child_node in dag_iterator(dag): + if parent_node.is_root: + data_parent: Dict[str, Any] = {} + if all_attrs: + data_parent.update( + parent_node.describe( + exclude_attributes=["name"], exclude_prefix="_" + ) + ) + else: + for k, v in attr_dict.items(): + data_parent[v] = parent_node.get_attr(k) + data_dict[parent_node.node_name] = data_parent + + if data_dict.get(child_node.node_name): + data_dict[child_node.node_name][parent_key].append(parent_node.node_name) + else: + data_child = {parent_key: [parent_node.node_name]} + if all_attrs: + data_child.update( + child_node.describe(exclude_attributes=["name"], exclude_prefix="_") + ) + else: + for k, v in attr_dict.items(): + data_child[v] = child_node.get_attr(k) + data_dict[child_node.node_name] = data_child + return data_dict + + +@optional_dependencies_pandas +def dag_to_dataframe( + dag: T, + name_col: str = "name", + parent_col: str = "parent", + attr_dict: Dict[str, str] = {}, + all_attrs: bool = False, +) -> pd.DataFrame: + """Export DAG to pandas DataFrame. + + Examples: + >>> from bigtree import DAGNode, dag_to_dataframe + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> dag_to_dataframe(a, name_col="name", parent_col="parent", attr_dict={"step": "step no."}) + name parent step no. + 0 a None 1 + 1 c a 2 + 2 d a 2 + 3 b None 1 + 4 c b 2 + 5 d c 2 + 6 e d 3 + + Args: + dag (DAGNode): DAG to be exported + name_col (str): column name for `node.node_name`, defaults to 'name' + parent_col (str): column name for `node.parent.node_name`, defaults to 'parent' + attr_dict (Dict[str, str]): dictionary mapping node attributes to column name, + key: node attributes, value: corresponding column in dataframe, optional + all_attrs (bool): indicator whether to retrieve all `Node` attributes, defaults to False + + Returns: + (pd.DataFrame) + """ + dag = dag.copy() + data_list: List[Dict[str, Any]] = [] + + for parent_node, child_node in dag_iterator(dag): + if parent_node.is_root: + data_parent = {name_col: parent_node.node_name, parent_col: None} + if all_attrs: + data_parent.update( + parent_node.describe( + exclude_attributes=["name"], exclude_prefix="_" + ) + ) + else: + for k, v in attr_dict.items(): + data_parent[v] = parent_node.get_attr(k) + data_list.append(data_parent) + + data_child = {name_col: child_node.node_name, parent_col: parent_node.node_name} + if all_attrs: + data_child.update( + child_node.describe(exclude_attributes=["name"], exclude_prefix="_") + ) + else: + for k, v in attr_dict.items(): + data_child[v] = child_node.get_attr(k) + data_list.append(data_child) + return pd.DataFrame(data_list).drop_duplicates().reset_index(drop=True) + + +@optional_dependencies_image("pydot") +def dag_to_dot( + dag: Union[T, List[T]], + rankdir: str = "TB", + bg_colour: str = "", + node_colour: str = "", + node_shape: str = "", + edge_colour: str = "", + node_attr: str = "", + edge_attr: str = "", +) -> pydot.Dot: + r"""Export DAG or list of DAGs to image. + Note that node names must be unique. + Possible node attributes include style, fillcolor, shape. + + Examples: + >>> from bigtree import DAGNode, dag_to_dot + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> dag_graph = dag_to_dot(a) + + Display image directly without saving (requires IPython) + + >>> from IPython.display import Image, display + >>> plt = Image(dag_graph.create_png()) + >>> display(plt) + + + Export to image, dot file, etc. + + >>> dag_graph.write_png("assets/docstr/tree_dag.png") + >>> dag_graph.write_dot("assets/docstr/tree_dag.dot") + + Export to string + + >>> dag_graph.to_string() + 'strict digraph G {\nrankdir=TB;\nc [label=c];\na [label=a];\na -> c;\nd [label=d];\na [label=a];\na -> d;\nc [label=c];\nb [label=b];\nb -> c;\nd [label=d];\nc [label=c];\nc -> d;\ne [label=e];\nd [label=d];\nd -> e;\n}\n' + + Args: + dag (Union[DAGNode, List[DAGNode]]): DAG or list of DAGs to be exported + rankdir (str): set direction of graph layout, defaults to 'TB', can be 'BT, 'LR', 'RL' + bg_colour (str): background color of image, defaults to '' + node_colour (str): fill colour of nodes, defaults to '' + node_shape (str): shape of nodes, defaults to None + Possible node_shape include "circle", "square", "diamond", "triangle" + edge_colour (str): colour of edges, defaults to '' + node_attr (str): node attribute for style, overrides node_colour, defaults to '' + Possible node attributes include {"style": "filled", "fillcolor": "gold"} + edge_attr (str): edge attribute for style, overrides edge_colour, defaults to '' + Possible edge attributes include {"style": "bold", "label": "edge label", "color": "black"} + + Returns: + (pydot.Dot) + """ + # Get style + if bg_colour: + graph_style = dict(bgcolor=bg_colour) + else: + graph_style = dict() + + if node_colour: + node_style = dict(style="filled", fillcolor=node_colour) + else: + node_style = dict() + + if node_shape: + node_style["shape"] = node_shape + + if edge_colour: + edge_style = dict(color=edge_colour) + else: + edge_style = dict() + + _graph = pydot.Dot( + graph_type="digraph", strict=True, rankdir=rankdir, **graph_style + ) + + if not isinstance(dag, list): + dag = [dag] + + for _dag in dag: + if not isinstance(_dag, DAGNode): + raise TypeError( + "Tree should be of type `DAGNode`, or inherit from `DAGNode`" + ) + _dag = _dag.copy() + + for parent_node, child_node in dag_iterator(_dag): + _node_style = node_style.copy() + _edge_style = edge_style.copy() + + child_name = child_node.name + if node_attr and child_node.get_attr(node_attr): + _node_style.update(child_node.get_attr(node_attr)) + if edge_attr and child_node.get_attr(edge_attr): + _edge_style.update(child_node.get_attr(edge_attr)) + pydot_child = pydot.Node(name=child_name, label=child_name, **_node_style) + _graph.add_node(pydot_child) + + parent_name = parent_node.name + parent_node_style = node_style.copy() + if node_attr and parent_node.get_attr(node_attr): + parent_node_style.update(parent_node.get_attr(node_attr)) + pydot_parent = pydot.Node( + name=parent_name, label=parent_name, **parent_node_style + ) + _graph.add_node(pydot_parent) + + edge = pydot.Edge(parent_name, child_name, **_edge_style) + _graph.add_edge(edge) + + return _graph diff --git a/python310/packages/bigtree/globals.py b/python310/packages/bigtree/globals.py new file mode 100644 index 0000000..52e7977 --- /dev/null +++ b/python310/packages/bigtree/globals.py @@ -0,0 +1,3 @@ +import os + +ASSERTIONS: bool = bool(os.environ.get("BIGTREE_CONF_ASSERTIONS", True)) diff --git a/python310/packages/bigtree/node/__init__.py b/python310/packages/bigtree/node/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python310/packages/bigtree/node/basenode.py b/python310/packages/bigtree/node/basenode.py new file mode 100644 index 0000000..5f4c420 --- /dev/null +++ b/python310/packages/bigtree/node/basenode.py @@ -0,0 +1,780 @@ +from __future__ import annotations + +import copy +from typing import Any, Dict, Generator, Iterable, List, Optional, Set, Tuple, TypeVar + +from bigtree.globals import ASSERTIONS +from bigtree.utils.exceptions import CorruptedTreeError, LoopError, TreeError +from bigtree.utils.iterators import preorder_iter + + +class BaseNode: + """ + BaseNode extends any Python class to a tree node. + Nodes can have attributes if they are initialized from `Node`, *dictionary*, or *pandas DataFrame*. + + Nodes can be linked to each other with `parent` and `children` setter methods, + or using bitshift operator with the convention `parent_node >> child_node` or `child_node << parent_node`. + + Examples: + >>> from bigtree import Node, print_tree + >>> root = Node("a", age=90) + >>> b = Node("b", age=65) + >>> c = Node("c", age=60) + >>> d = Node("d", age=40) + >>> root.children = [b, c] + >>> d.parent = b + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ └── d [age=40] + └── c [age=60] + + >>> from bigtree import Node + >>> root = Node("a", age=90) + >>> b = Node("b", age=65) + >>> c = Node("c", age=60) + >>> d = Node("d", age=40) + >>> root >> b + >>> root >> c + >>> d << b + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ └── d [age=40] + └── c [age=60] + + Directly passing `parent` argument. + + >>> from bigtree import Node + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=b) + + Directly passing `children` argument. + + >>> from bigtree import Node + >>> d = Node("d") + >>> c = Node("c") + >>> b = Node("b", children=[d]) + >>> a = Node("a", children=[b, c]) + + **BaseNode Creation** + + Node can be created by instantiating a `BaseNode` class or by using a *dictionary*. + If node is created with dictionary, all keys of dictionary will be stored as class attributes. + + >>> from bigtree import Node + >>> root = Node.from_dict({"name": "a", "age": 90}) + + **BaseNode Attributes** + + These are node attributes that have getter and/or setter methods. + + Get and set other `BaseNode` + + 1. ``parent``: Get/set parent node + 2. ``children``: Get/set child nodes + + Get other `BaseNode` + + 1. ``ancestors``: Get ancestors of node excluding self, iterator + 2. ``descendants``: Get descendants of node excluding self, iterator + 3. ``leaves``: Get all leaf node(s) from self, iterator + 4. ``siblings``: Get siblings of self + 5. ``left_sibling``: Get sibling left of self + 6. ``right_sibling``: Get sibling right of self + + Get `BaseNode` configuration + + 1. ``node_path``: Get tuple of nodes from root + 2. ``is_root``: Get indicator if self is root node + 3. ``is_leaf``: Get indicator if self is leaf node + 4. ``root``: Get root node of tree + 5. ``depth``: Get depth of self + 6. ``max_depth``: Get maximum depth from root to leaf node + + **BaseNode Methods** + + These are methods available to be performed on `BaseNode`. + + Constructor methods + + 1. ``from_dict()``: Create BaseNode from dictionary + + `BaseNode` methods + + 1. ``describe()``: Get node information sorted by attributes, return list of tuples + 2. ``get_attr(attr_name: str)``: Get value of node attribute + 3. ``set_attrs(attrs: dict)``: Set node attribute name(s) and value(s) + 4. ``go_to(node: Self)``: Get a path from own node to another node from same tree + 5. ``append(node: Self)``: Add child to node + 6. ``extend(nodes: List[Self])``: Add multiple children to node + 7. ``copy()``: Deep copy self + 8. ``sort()``: Sort child nodes + + ---- + + """ + + def __init__( + self, + parent: Optional[T] = None, + children: Optional[List[T]] = None, + **kwargs: Any, + ): + self.__parent: Optional[T] = None + self.__children: List[T] = [] + if children is None: + children = [] + self.parent = parent + self.children = children # type: ignore + if "parents" in kwargs: + raise AttributeError( + "Attempting to set `parents` attribute, do you mean `parent`?" + ) + self.__dict__.update(**kwargs) + + @staticmethod + def __check_parent_type(new_parent: T) -> None: + """Check parent type + + Args: + new_parent (Self): parent node + """ + if not (isinstance(new_parent, BaseNode) or new_parent is None): + raise TypeError( + f"Expect parent to be BaseNode type or NoneType, received input type {type(new_parent)}" + ) + + def __check_parent_loop(self, new_parent: T) -> None: + """Check parent type + + Args: + new_parent (Self): parent node + """ + if new_parent is not None: + if new_parent is self: + raise LoopError("Error setting parent: Node cannot be parent of itself") + if any( + ancestor is self + for ancestor in new_parent.ancestors + if new_parent.ancestors + ): + raise LoopError( + "Error setting parent: Node cannot be ancestor of itself" + ) + + @property + def parent(self: T) -> Optional[T]: + """Get parent node + + Returns: + (Optional[Self]) + """ + return self.__parent + + @parent.setter + def parent(self: T, new_parent: T) -> None: + """Set parent node + + Args: + new_parent (Self): parent node + """ + if ASSERTIONS: + self.__check_parent_type(new_parent) + self.__check_parent_loop(new_parent) + + current_parent = self.parent + current_child_idx = None + + # Assign new parent - rollback if error + self.__pre_assign_parent(new_parent) + try: + # Remove self from old parent + if current_parent is not None: + if not any( + child is self for child in current_parent.children + ): # pragma: no cover + raise CorruptedTreeError( + "Error setting parent: Node does not exist as children of its parent" + ) + current_child_idx = current_parent.__children.index(self) + current_parent.__children.remove(self) + + # Assign self to new parent + self.__parent = new_parent + if new_parent is not None: + new_parent.__children.append(self) + + self.__post_assign_parent(new_parent) + + except Exception as exc_info: + # Remove self from new parent + if new_parent is not None: + new_parent.__children.remove(self) + + # Reassign self to old parent + self.__parent = current_parent + if current_child_idx is not None: + current_parent.__children.insert(current_child_idx, self) + raise TreeError(exc_info) + + def __pre_assign_parent(self, new_parent: T) -> None: + """Custom method to check before attaching parent + Can be overridden with `_BaseNode__pre_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + def __post_assign_parent(self, new_parent: T) -> None: + """Custom method to check after attaching parent + Can be overridden with `_BaseNode__post_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + @property + def parents(self) -> None: + """Do not allow `parents` attribute to be accessed + + Raises: + AttributeError: No such attribute + """ + raise AttributeError( + "Attempting to access `parents` attribute, do you mean `parent`?" + ) + + @parents.setter + def parents(self, new_parent: T) -> None: + """Do not allow `parents` attribute to be set + + Args: + new_parent (Self): parent node + + Raises: + AttributeError: No such attribute + """ + raise AttributeError( + "Attempting to set `parents` attribute, do you mean `parent`?" + ) + + def __check_children_type( + self: T, new_children: List[T] | Tuple[T] | Set[T] + ) -> None: + """Check child type + + Args: + new_children (Iterable[Self]): child node + """ + if ( + not isinstance(new_children, list) + and not isinstance(new_children, tuple) + and not isinstance(new_children, set) + ): + raise TypeError( + f"Expect children to be List or Tuple or Set type, received input type {type(new_children)}" + ) + + def __check_children_loop(self: T, new_children: Iterable[T]) -> None: + """Check child loop + + Args: + new_children (Iterable[Self]): child node + """ + seen_children = [] + for new_child in new_children: + # Check type + if not isinstance(new_child, BaseNode): + raise TypeError( + f"Expect children to be BaseNode type, received input type {type(new_child)}" + ) + + # Check for loop and tree structure + if new_child is self: + raise LoopError("Error setting child: Node cannot be child of itself") + if any(child is new_child for child in self.ancestors): + raise LoopError( + "Error setting child: Node cannot be ancestor of itself" + ) + + # Check for duplicate children + if id(new_child) in seen_children: + raise TreeError( + "Error setting child: Node cannot be added multiple times as a child" + ) + else: + seen_children.append(id(new_child)) + + @property + def children(self: T) -> Tuple[T, ...]: + """Get child nodes + + Returns: + (Tuple[Self, ...]) + """ + return tuple(self.__children) + + @children.setter + def children(self: T, new_children: List[T] | Tuple[T] | Set[T]) -> None: + """Set child nodes + + Args: + new_children (List[Self]): child node + """ + if ASSERTIONS: + self.__check_children_type(new_children) + self.__check_children_loop(new_children) + new_children = list(new_children) + + current_new_children = { + new_child: (new_child.parent.__children.index(new_child), new_child.parent) + for new_child in new_children + if new_child.parent is not None + } + current_new_orphan = [ + new_child for new_child in new_children if new_child.parent is None + ] + current_children = list(self.children) + + # Assign new children - rollback if error + self.__pre_assign_children(new_children) + try: + # Remove old children from self + del self.children + + # Assign new children to self + self.__children = new_children + for new_child in new_children: + if new_child.parent: + new_child.parent.__children.remove(new_child) + new_child.__parent = self + self.__post_assign_children(new_children) + except Exception as exc_info: + # Reassign new children to their original parent + for child, idx_parent in current_new_children.items(): + child_idx, parent = idx_parent + child.__parent = parent + parent.__children.insert(child_idx, child) + for child in current_new_orphan: + child.__parent = None + + # Reassign old children to self + self.__children = current_children + for child in current_children: + child.__parent = self + raise TreeError(exc_info) + + @children.deleter + def children(self) -> None: + """Delete child node(s)""" + for child in self.children: + child.parent.__children.remove(child) # type: ignore + child.__parent = None + + def __pre_assign_children(self: T, new_children: Iterable[T]) -> None: + """Custom method to check before attaching children + Can be overridden with `_BaseNode__pre_assign_children()` + + Args: + new_children (Iterable[Self]): new children to be added + """ + pass + + def __post_assign_children(self: T, new_children: Iterable[T]) -> None: + """Custom method to check after attaching children + Can be overridden with `_BaseNode__post_assign_children()` + + Args: + new_children (Iterable[Self]): new children to be added + """ + pass + + @property + def ancestors(self: T) -> Iterable[T]: + """Get iterator to yield all ancestors of self, does not include self + + Returns: + (Iterable[Self]) + """ + node = self.parent + while node is not None: + yield node + node = node.parent + + @property + def descendants(self: T) -> Iterable[T]: + """Get iterator to yield all descendants of self, does not include self + + Returns: + (Iterable[Self]) + """ + yield from preorder_iter(self, filter_condition=lambda _node: _node != self) + + @property + def leaves(self: T) -> Iterable[T]: + """Get iterator to yield all leaf nodes from self + + Returns: + (Iterable[Self]) + """ + yield from preorder_iter(self, filter_condition=lambda _node: _node.is_leaf) + + @property + def siblings(self: T) -> Iterable[T]: + """Get siblings of self + + Returns: + (Iterable[Self]) + """ + if self.parent is None: + return () + return tuple(child for child in self.parent.children if child is not self) + + @property + def left_sibling(self: T) -> T: + """Get sibling left of self + + Returns: + (Self) + """ + if self.parent: + children = self.parent.children + child_idx = children.index(self) + if child_idx: + return self.parent.children[child_idx - 1] + + @property + def right_sibling(self: T) -> T: + """Get sibling right of self + + Returns: + (Self) + """ + if self.parent: + children = self.parent.children + child_idx = children.index(self) + if child_idx + 1 < len(children): + return self.parent.children[child_idx + 1] + + @property + def node_path(self: T) -> Iterable[T]: + """Get tuple of nodes starting from root + + Returns: + (Iterable[Self]) + """ + if self.parent is None: + return [self] + return tuple(list(self.parent.node_path) + [self]) + + @property + def is_root(self) -> bool: + """Get indicator if self is root node + + Returns: + (bool) + """ + return self.parent is None + + @property + def is_leaf(self) -> bool: + """Get indicator if self is leaf node + + Returns: + (bool) + """ + return not len(list(self.children)) + + @property + def root(self: T) -> T: + """Get root node of tree + + Returns: + (Self) + """ + if self.parent is None: + return self + return self.parent.root + + @property + def depth(self) -> int: + """Get depth of self, indexing starts from 1 + + Returns: + (int) + """ + if self.parent is None: + return 1 + return self.parent.depth + 1 + + @property + def max_depth(self) -> int: + """Get maximum depth from root to leaf node + + Returns: + (int) + """ + return max( + [self.root.depth] + [node.depth for node in list(self.root.descendants)] + ) + + @classmethod + def from_dict(cls, input_dict: Dict[str, Any]) -> BaseNode: + """Construct node from dictionary, all keys of dictionary will be stored as class attributes + Input dictionary must have key `name` if not `Node` will not have any name + + Examples: + >>> from bigtree import Node + >>> a = Node.from_dict({"name": "a", "age": 90}) + + Args: + input_dict (Dict[str, Any]): dictionary with node information, key: attribute name, value: attribute value + + Returns: + (BaseNode) + """ + return cls(**input_dict) + + def describe( + self, exclude_attributes: List[str] = [], exclude_prefix: str = "" + ) -> List[Tuple[str, Any]]: + """Get node information sorted by attribute name, returns list of tuples + + Examples: + >>> from bigtree.node.node import Node + >>> a = Node('a', age=90) + >>> a.describe() + [('_BaseNode__children', []), ('_BaseNode__parent', None), ('_sep', '/'), ('age', 90), ('name', 'a')] + >>> a.describe(exclude_prefix="_") + [('age', 90), ('name', 'a')] + >>> a.describe(exclude_prefix="_", exclude_attributes=["name"]) + [('age', 90)] + + Args: + exclude_attributes (List[str]): list of attributes to exclude + exclude_prefix (str): prefix of attributes to exclude + + Returns: + (List[Tuple[str, Any]]) + """ + return [ + item + for item in sorted(self.__dict__.items(), key=lambda item: item[0]) + if (item[0] not in exclude_attributes) + and (not len(exclude_prefix) or not item[0].startswith(exclude_prefix)) + ] + + def get_attr(self, attr_name: str, default_value: Any = None) -> Any: + """Get value of node attribute + Returns default value if attribute name does not exist + + Examples: + >>> from bigtree.node.node import Node + >>> a = Node('a', age=90) + >>> a.get_attr("age") + 90 + + Args: + attr_name (str): attribute name + default_value (Any): default value if attribute does not exist, defaults to None + + Returns: + (Any) + """ + try: + return getattr(self, attr_name) + except AttributeError: + return default_value + + def set_attrs(self, attrs: Dict[str, Any]) -> None: + """Set node attributes + + Examples: + >>> from bigtree.node.node import Node + >>> a = Node('a') + >>> a.set_attrs({"age": 90}) + >>> a + Node(/a, age=90) + + Args: + attrs (Dict[str, Any]): attribute dictionary, + key: attribute name, value: attribute value + """ + self.__dict__.update(attrs) + + def go_to(self: T, node: T) -> Iterable[T]: + """Get path from current node to specified node from same tree + + Examples: + >>> from bigtree import Node, print_tree + >>> a = Node(name="a") + >>> b = Node(name="b", parent=a) + >>> c = Node(name="c", parent=a) + >>> d = Node(name="d", parent=b) + >>> e = Node(name="e", parent=b) + >>> f = Node(name="f", parent=c) + >>> g = Node(name="g", parent=e) + >>> h = Node(name="h", parent=e) + >>> print_tree(a) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + >>> d.go_to(d) + [Node(/a/b/d, )] + >>> d.go_to(g) + [Node(/a/b/d, ), Node(/a/b, ), Node(/a/b/e, ), Node(/a/b/e/g, )] + >>> d.go_to(f) + [Node(/a/b/d, ), Node(/a/b, ), Node(/a, ), Node(/a/c, ), Node(/a/c/f, )] + + Args: + node (Self): node to travel to from current node, inclusive of start and end node + + Returns: + (Iterable[Self]) + """ + if not isinstance(node, BaseNode): + raise TypeError( + f"Expect node to be BaseNode type, received input type {type(node)}" + ) + if self.root != node.root: + raise TreeError( + f"Nodes are not from the same tree. Check {self} and {node}" + ) + if self == node: + return [self] + self_path = [self] + list(self.ancestors) + node_path = ([node] + list(node.ancestors))[::-1] + common_nodes = set(self_path).intersection(set(node_path)) + self_min_index, min_common_node = sorted( + [(self_path.index(_node), _node) for _node in common_nodes] + )[0] + node_min_index = node_path.index(min_common_node) + return self_path[:self_min_index] + node_path[node_min_index:] + + def append(self: T, other: T) -> None: + """Add other as child of self + + Args: + other (Self): other node, child to be added + """ + other.parent = self + + def extend(self: T, others: List[T]) -> None: + """Add others as children of self + + Args: + others (Self): other nodes, children to be added + """ + for child in others: + child.parent = self + + def copy(self: T) -> T: + """Deep copy self; clone self + + Examples: + >>> from bigtree.node.node import Node + >>> a = Node('a') + >>> a_copy = a.copy() + + Returns: + (Self) + """ + return copy.deepcopy(self) + + def sort(self: T, **kwargs: Any) -> None: + """Sort children, possible keyword arguments include ``key=lambda node: node.name``, ``reverse=True`` + + Examples: + >>> from bigtree import Node, print_tree + >>> a = Node('a') + >>> c = Node("c", parent=a) + >>> b = Node("b", parent=a) + >>> print_tree(a) + a + ├── c + └── b + >>> a.sort(key=lambda node: node.name) + >>> print_tree(a) + a + ├── b + └── c + """ + children = list(self.children) + children.sort(**kwargs) + self.__children = children + + def __copy__(self: T) -> T: + """Shallow copy self + + Examples: + >>> import copy + >>> from bigtree.node.node import Node + >>> a = Node('a') + >>> a_copy = copy.deepcopy(a) + + Returns: + (Self) + """ + obj: T = type(self).__new__(self.__class__) + obj.__dict__.update(self.__dict__) + return obj + + def __repr__(self) -> str: + """Print format of BaseNode + + Returns: + (str) + """ + class_name = self.__class__.__name__ + node_dict = self.describe(exclude_prefix="_") + node_description = ", ".join([f"{k}={v}" for k, v in node_dict]) + return f"{class_name}({node_description})" + + def __rshift__(self: T, other: T) -> None: + """Set children using >> bitshift operator for self >> children (other) + + Args: + other (Self): other node, children + """ + other.parent = self + + def __lshift__(self: T, other: T) -> None: + """Set parent using << bitshift operator for self << parent (other) + + Args: + other (Self): other node, parent + """ + self.parent = other + + def __iter__(self) -> Generator[T, None, None]: + """Iterate through child nodes + + Returns: + (Self): child node + """ + yield from self.children # type: ignore + + def __contains__(self, other_node: T) -> bool: + """Check if child node exists + + Args: + other_node (T): child node + + Returns: + (bool) + """ + return other_node in self.children + + +T = TypeVar("T", bound=BaseNode) diff --git a/python310/packages/bigtree/node/binarynode.py b/python310/packages/bigtree/node/binarynode.py new file mode 100644 index 0000000..4f796f8 --- /dev/null +++ b/python310/packages/bigtree/node/binarynode.py @@ -0,0 +1,418 @@ +from __future__ import annotations + +from typing import Any, List, Optional, Tuple, TypeVar, Union + +from bigtree.globals import ASSERTIONS +from bigtree.node.node import Node +from bigtree.utils.exceptions import CorruptedTreeError, LoopError, TreeError + + +class BinaryNode(Node): + """ + BinaryNode is an extension of Node, and is able to extend to any Python class for Binary Tree implementation. + Nodes can have attributes if they are initialized from `BinaryNode`, *dictionary*, or *pandas DataFrame*. + + BinaryNode can be linked to each other with `children`, `left`, or `right` setter methods. + If initialized with `children`, it must be length 2, denoting left and right child. + + Examples: + >>> from bigtree import BinaryNode, print_tree + >>> a = BinaryNode(1) + >>> b = BinaryNode(2) + >>> c = BinaryNode(3) + >>> d = BinaryNode(4) + >>> a.children = [b, c] + >>> b.right = d + >>> print_tree(a) + 1 + ├── 2 + │ └── 4 + └── 3 + + Directly passing `left`, `right`, or `children` argument. + + >>> from bigtree import BinaryNode + >>> d = BinaryNode(4) + >>> c = BinaryNode(3) + >>> b = BinaryNode(2, right=d) + >>> a = BinaryNode(1, children=[b, c]) + + **BinaryNode Creation** + + Node can be created by instantiating a `BinaryNode` class or by using a *dictionary*. + If node is created with dictionary, all keys of dictionary will be stored as class attributes. + + >>> from bigtree import BinaryNode + >>> a = BinaryNode.from_dict({"name": "1"}) + >>> a + BinaryNode(name=1, val=1) + + **BinaryNode Attributes** + + These are node attributes that have getter and/or setter methods. + + Get `BinaryNode` configuration + + 1. ``left``: Get left children + 2. ``right``: Get right children + + ---- + + """ + + def __init__( + self, + name: Union[str, int] = "", + left: Optional[T] = None, + right: Optional[T] = None, + parent: Optional[T] = None, + children: Optional[List[Optional[T]]] = None, + **kwargs: Any, + ): + try: + self.val: Union[str, int] = int(name) + except ValueError: + self.val = str(name) + self.name = str(name) + self._sep = "/" + self.__parent: Optional[T] = None + self.__children: List[Optional[T]] = [None, None] + if not children: + children = [] + if len(children): + if len(children) and len(children) != 2: + raise ValueError("Children input must have length 2") + if left and left != children[0]: + raise ValueError( + f"Error setting child: Attempting to set both left and children with mismatched values\n" + f"Check left {left} and children {children}" + ) + if right and right != children[1]: + raise ValueError( + f"Error setting child: Attempting to set both right and children with mismatched values\n" + f"Check right {right} and children {children}" + ) + else: + children = [left, right] + self.parent = parent + self.children = children # type: ignore + if "parents" in kwargs: + raise AttributeError( + "Attempting to set `parents` attribute, do you mean `parent`?" + ) + self.__dict__.update(**kwargs) + + @property + def left(self: T) -> T: + """Get left children + + Returns: + (Self) + """ + return self.__children[0] + + @left.setter + def left(self: T, left_child: Optional[T]) -> None: + """Set left children + + Args: + left_child (Optional[Self]): left child + """ + self.children = [left_child, self.right] # type: ignore + + @property + def right(self: T) -> T: + """Get right children + + Returns: + (Self) + """ + return self.__children[1] + + @right.setter + def right(self: T, right_child: Optional[T]) -> None: + """Set right children + + Args: + right_child (Optional[Self]): right child + """ + self.children = [self.left, right_child] # type: ignore + + @staticmethod + def __check_parent_type(new_parent: Optional[T]) -> None: + """Check parent type + + Args: + new_parent (Optional[Self]): parent node + """ + if not (isinstance(new_parent, BinaryNode) or new_parent is None): + raise TypeError( + f"Expect parent to be BinaryNode type or NoneType, received input type {type(new_parent)}" + ) + + @property + def parent(self: T) -> Optional[T]: + """Get parent node + + Returns: + (Optional[Self]) + """ + return self.__parent + + @parent.setter + def parent(self: T, new_parent: Optional[T]) -> None: + """Set parent node + + Args: + new_parent (Optional[Self]): parent node + """ + if ASSERTIONS: + self.__check_parent_type(new_parent) + self._BaseNode__check_parent_loop(new_parent) # type: ignore + + current_parent = self.parent + current_child_idx = None + + # Assign new parent - rollback if error + self.__pre_assign_parent(new_parent) + try: + # Remove self from old parent + if current_parent is not None: + if not any( + child is self for child in current_parent.children + ): # pragma: no cover + raise CorruptedTreeError( + "Error setting parent: Node does not exist as children of its parent" + ) + current_child_idx = current_parent.__children.index(self) + current_parent.__children[current_child_idx] = None + + # Assign self to new parent + self.__parent = new_parent + if new_parent is not None: + inserted = False + for child_idx, child in enumerate(new_parent.__children): + if not child and not inserted: + new_parent.__children[child_idx] = self + inserted = True + if not inserted: + raise TreeError(f"Parent {new_parent} already has 2 children") + + self.__post_assign_parent(new_parent) + + except Exception as exc_info: + # Remove self from new parent + if new_parent is not None and self in new_parent.__children: + child_idx = new_parent.__children.index(self) + new_parent.__children[child_idx] = None + + # Reassign self to old parent + self.__parent = current_parent + if current_child_idx is not None: + current_parent.__children[current_child_idx] = self + raise TreeError(exc_info) + + def __pre_assign_parent(self: T, new_parent: Optional[T]) -> None: + """Custom method to check before attaching parent + Can be overridden with `_BinaryNode__pre_assign_parent()` + + Args: + new_parent (Optional[Self]): new parent to be added + """ + pass + + def __post_assign_parent(self: T, new_parent: Optional[T]) -> None: + """Custom method to check after attaching parent + Can be overridden with `_BinaryNode__post_assign_parent()` + + Args: + new_parent (Optional[Self]): new parent to be added + """ + pass + + def __check_children_type( + self: T, new_children: List[Optional[T]] + ) -> List[Optional[T]]: + """Check child type + + Args: + new_children (List[Optional[Self]]): child node + + Returns: + (List[Optional[Self]]) + """ + if not len(new_children): + new_children = [None, None] + if len(new_children) != 2: + raise ValueError("Children input must have length 2") + return new_children + + def __check_children_loop(self: T, new_children: List[Optional[T]]) -> None: + """Check child loop + + Args: + new_children (List[Optional[Self]]): child node + """ + seen_children = [] + for new_child in new_children: + # Check type + if new_child is not None and not isinstance(new_child, BinaryNode): + raise TypeError( + f"Expect children to be BinaryNode type or NoneType, received input type {type(new_child)}" + ) + + # Check for loop and tree structure + if new_child is self: + raise LoopError("Error setting child: Node cannot be child of itself") + if any(child is new_child for child in self.ancestors): + raise LoopError( + "Error setting child: Node cannot be ancestor of itself" + ) + + # Check for duplicate children + if new_child is not None: + if id(new_child) in seen_children: + raise TreeError( + "Error setting child: Node cannot be added multiple times as a child" + ) + else: + seen_children.append(id(new_child)) + + @property + def children(self: T) -> Tuple[T, ...]: + """Get child nodes + + Returns: + (Tuple[Optional[Self]]) + """ + return tuple(self.__children) + + @children.setter + def children(self: T, _new_children: List[Optional[T]]) -> None: + """Set child nodes + + Args: + _new_children (List[Optional[Self]]): child node + """ + self._BaseNode__check_children_type(_new_children) # type: ignore + new_children = self.__check_children_type(_new_children) + if ASSERTIONS: + self.__check_children_loop(new_children) + + current_new_children = { + new_child: ( + new_child.parent.__children.index(new_child), + new_child.parent, + ) + for new_child in new_children + if new_child is not None and new_child.parent is not None + } + current_new_orphan = [ + new_child + for new_child in new_children + if new_child is not None and new_child.parent is None + ] + current_children = list(self.children) + + # Assign new children - rollback if error + self.__pre_assign_children(new_children) + try: + # Remove old children from self + del self.children + + # Assign new children to self + self.__children = new_children + for new_child in new_children: + if new_child is not None: + if new_child.parent: + child_idx = new_child.parent.__children.index(new_child) + new_child.parent.__children[child_idx] = None + new_child.__parent = self + self.__post_assign_children(new_children) + except Exception as exc_info: + # Reassign new children to their original parent + for child, idx_parent in current_new_children.items(): + child_idx, parent = idx_parent + child.__parent = parent + parent.__children[child_idx] = child + for child in current_new_orphan: + child.__parent = None + + # Reassign old children to self + self.__children = current_children + for child in current_children: + if child: + child.__parent = self + raise TreeError(exc_info) + + @children.deleter + def children(self) -> None: + """Delete child node(s)""" + for child in self.children: + if child is not None: + child.parent.__children.remove(child) # type: ignore + child.__parent = None + + def __pre_assign_children(self: T, new_children: List[Optional[T]]) -> None: + """Custom method to check before attaching children + Can be overridden with `_BinaryNode__pre_assign_children()` + + Args: + new_children (List[Optional[Self]]): new children to be added + """ + pass + + def __post_assign_children(self: T, new_children: List[Optional[T]]) -> None: + """Custom method to check after attaching children + Can be overridden with `_BinaryNode__post_assign_children()` + + Args: + new_children (List[Optional[Self]]): new children to be added + """ + pass + + @property + def is_leaf(self) -> bool: + """Get indicator if self is leaf node + + Returns: + (bool) + """ + return not len([child for child in self.children if child]) + + def sort(self, **kwargs: Any) -> None: + """Sort children, possible keyword arguments include ``key=lambda node: node.val``, ``reverse=True`` + + Examples: + >>> from bigtree import BinaryNode, print_tree + >>> a = BinaryNode(1) + >>> c = BinaryNode(3, parent=a) + >>> b = BinaryNode(2, parent=a) + >>> print_tree(a) + 1 + ├── 3 + └── 2 + >>> a.sort(key=lambda node: node.val) + >>> print_tree(a) + 1 + ├── 2 + └── 3 + """ + children = [child for child in self.children if child] + if len(children) == 2: + children.sort(**kwargs) + self.__children = children # type: ignore + + def __repr__(self) -> str: + """Print format of BinaryNode + + Returns: + (str) + """ + class_name = self.__class__.__name__ + node_dict = self.describe(exclude_prefix="_", exclude_attributes=[]) + node_description = ", ".join([f"{k}={v}" for k, v in node_dict]) + return f"{class_name}({node_description})" + + +T = TypeVar("T", bound=BinaryNode) diff --git a/python310/packages/bigtree/node/dagnode.py b/python310/packages/bigtree/node/dagnode.py new file mode 100644 index 0000000..a72aad1 --- /dev/null +++ b/python310/packages/bigtree/node/dagnode.py @@ -0,0 +1,672 @@ +from __future__ import annotations + +import copy +from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, TypeVar + +from bigtree.globals import ASSERTIONS +from bigtree.utils.exceptions import LoopError, TreeError +from bigtree.utils.iterators import preorder_iter + + +class DAGNode: + """ + Base DAGNode extends any Python class to a DAG node, for DAG implementation. + In DAG implementation, a node can have multiple parents. + + Parents and children cannot be reassigned once assigned, as Nodes are allowed to have multiple parents and children. + If each node only has one parent, use `Node` class. + DAGNodes can have attributes if they are initialized from `DAGNode` or dictionary. + + DAGNode can be linked to each other with `parents` and `children` setter methods, + or using bitshift operator with the convention `parent_node >> child_node` or `child_node << parent_node`. + + Examples: + >>> from bigtree import DAGNode + >>> a = DAGNode("a") + >>> b = DAGNode("b") + >>> c = DAGNode("c") + >>> d = DAGNode("d") + >>> c.parents = [a, b] + >>> c.children = [d] + + >>> from bigtree import DAGNode + >>> a = DAGNode("a") + >>> b = DAGNode("b") + >>> c = DAGNode("c") + >>> d = DAGNode("d") + >>> a >> c + >>> b >> c + >>> d << c + + Directly passing `parents` argument. + + >>> from bigtree import DAGNode + >>> a = DAGNode("a") + >>> b = DAGNode("b") + >>> c = DAGNode("c", parents=[a, b]) + >>> d = DAGNode("d", parents=[c]) + + Directly passing `children` argument. + + >>> from bigtree import DAGNode + >>> d = DAGNode("d") + >>> c = DAGNode("c", children=[d]) + >>> b = DAGNode("b", children=[c]) + >>> a = DAGNode("a", children=[c]) + + **DAGNode Creation** + + Node can be created by instantiating a `DAGNode` class or by using a *dictionary*. + If node is created with dictionary, all keys of dictionary will be stored as class attributes. + + >>> from bigtree import DAGNode + >>> a = DAGNode.from_dict({"name": "a", "age": 90}) + + **DAGNode Attributes** + + These are node attributes that have getter and/or setter methods. + + Get and set other `DAGNode` + + 1. ``parents``: Get/set parent nodes + 2. ``children``: Get/set child nodes + + Get other `DAGNode` + + 1. ``ancestors``: Get ancestors of node excluding self, iterator + 2. ``descendants``: Get descendants of node excluding self, iterator + 3. ``siblings``: Get siblings of self + + Get `DAGNode` configuration + + 1. ``node_name``: Get node name, without accessing `name` directly + 2. ``is_root``: Get indicator if self is root node + 3. ``is_leaf``: Get indicator if self is leaf node + + **DAGNode Methods** + + These are methods available to be performed on `DAGNode`. + + Constructor methods + + 1. ``from_dict()``: Create DAGNode from dictionary + + `DAGNode` methods + + 1. ``describe()``: Get node information sorted by attributes, return list of tuples + 2. ``get_attr(attr_name: str)``: Get value of node attribute + 3. ``set_attrs(attrs: dict)``: Set node attribute name(s) and value(s) + 4. ``go_to(node: Self)``: Get a path from own node to another node from same DAG + 5. ``copy()``: Deep copy self + + ---- + + """ + + def __init__( + self, + name: str = "", + parents: Optional[List[T]] = None, + children: Optional[List[T]] = None, + **kwargs: Any, + ): + self.name = name + self.__parents: List[T] = [] + self.__children: List[T] = [] + if parents is None: + parents = [] + if children is None: + children = [] + self.parents = parents + self.children = children + if "parent" in kwargs: + raise AttributeError( + "Attempting to set `parent` attribute, do you mean `parents`?" + ) + self.__dict__.update(**kwargs) + + @property + def parent(self) -> None: + """Do not allow `parent` attribute to be accessed + + Raises: + AttributeError: No such attribute + """ + raise AttributeError( + "Attempting to access `parent` attribute, do you mean `parents`?" + ) + + @parent.setter + def parent(self, new_parent: T) -> None: + """Do not allow `parent` attribute to be set + + Args: + new_parent (Self): parent node + + Raises: + AttributeError + """ + raise AttributeError( + "Attempting to set `parent` attribute, do you mean `parents`?" + ) + + @staticmethod + def __check_parent_type(new_parents: List[T]) -> None: + """Check parent type + + Args: + new_parents (List[Self]): parent nodes + """ + if not isinstance(new_parents, list): + raise TypeError( + f"Parents input should be list type, received input type {type(new_parents)}" + ) + + def __check_parent_loop(self: T, new_parents: List[T]) -> None: + """Check parent type + + Args: + new_parents (List[Self]): parent nodes + """ + seen_parent = [] + for new_parent in new_parents: + # Check type + if not isinstance(new_parent, DAGNode): + raise TypeError( + f"Expect parent to be DAGNode type, received input type {type(new_parent)}" + ) + + # Check for loop and tree structure + if new_parent is self: + raise LoopError("Error setting parent: Node cannot be parent of itself") + if new_parent.ancestors: + if any(ancestor is self for ancestor in new_parent.ancestors): + raise LoopError( + "Error setting parent: Node cannot be ancestor of itself" + ) + + # Check for duplicate children + if id(new_parent) in seen_parent: + raise TreeError( + "Error setting parent: Node cannot be added multiple times as a parent" + ) + else: + seen_parent.append(id(new_parent)) + + @property + def parents(self: T) -> Iterable[T]: + """Get parent nodes + + Returns: + (Iterable[Self]) + """ + return tuple(self.__parents) + + @parents.setter + def parents(self: T, new_parents: List[T]) -> None: + """Set parent node + + Args: + new_parents (List[Self]): parent nodes + """ + if ASSERTIONS: + self.__check_parent_type(new_parents) + self.__check_parent_loop(new_parents) + + current_parents = self.__parents.copy() + + # Assign new parents - rollback if error + self.__pre_assign_parents(new_parents) + try: + # Assign self to new parent + for new_parent in new_parents: + if new_parent not in self.__parents: + self.__parents.append(new_parent) + new_parent.__children.append(self) + + self.__post_assign_parents(new_parents) + except Exception as exc_info: + # Remove self from new parent + for new_parent in new_parents: + if new_parent not in current_parents: + self.__parents.remove(new_parent) + new_parent.__children.remove(self) + raise TreeError(exc_info) + + def __pre_assign_parents(self: T, new_parents: List[T]) -> None: + """Custom method to check before attaching parent + Can be overridden with `_DAGNode__pre_assign_parent()` + + Args: + new_parents (List[Self]): new parents to be added + """ + pass + + def __post_assign_parents(self: T, new_parents: List[T]) -> None: + """Custom method to check after attaching parent + Can be overridden with `_DAGNode__post_assign_parent()` + + Args: + new_parents (List[Self]): new parents to be added + """ + pass + + def __check_children_type(self: T, new_children: Iterable[T]) -> None: + """Check child type + + Args: + new_children (Iterable[Self]): child node + """ + if not isinstance(new_children, Iterable): + raise TypeError( + f"Expect children to be Iterable type, received input type {type(new_children)}" + ) + + def __check_children_loop(self: T, new_children: Iterable[T]) -> None: + """Check child loop + + Args: + new_children (Iterable[Self]): child node + """ + seen_children = [] + for new_child in new_children: + # Check type + if not isinstance(new_child, DAGNode): + raise TypeError( + f"Expect children to be DAGNode type, received input type {type(new_child)}" + ) + + # Check for loop and tree structure + if new_child is self: + raise LoopError("Error setting child: Node cannot be child of itself") + if any(child is new_child for child in self.ancestors): + raise LoopError( + "Error setting child: Node cannot be ancestor of itself" + ) + + # Check for duplicate children + if id(new_child) in seen_children: + raise TreeError( + "Error setting child: Node cannot be added multiple times as a child" + ) + else: + seen_children.append(id(new_child)) + + @property + def children(self: T) -> Iterable[T]: + """Get child nodes + + Returns: + (Iterable[Self]) + """ + return tuple(self.__children) + + @children.setter + def children(self: T, new_children: Iterable[T]) -> None: + """Set child nodes + + Args: + new_children (Iterable[Self]): child node + """ + if ASSERTIONS: + self.__check_children_type(new_children) + self.__check_children_loop(new_children) + + current_children = list(self.children) + + # Assign new children - rollback if error + self.__pre_assign_children(new_children) + try: + # Assign new children to self + for new_child in new_children: + if self not in new_child.__parents: + new_child.__parents.append(self) + self.__children.append(new_child) + self.__post_assign_children(new_children) + except Exception as exc_info: + # Reassign old children to self + for new_child in new_children: + if new_child not in current_children: + new_child.__parents.remove(self) + self.__children.remove(new_child) + raise TreeError(exc_info) + + @children.deleter + def children(self) -> None: + """Delete child node(s)""" + for child in self.children: + self.__children.remove(child) # type: ignore + child.__parents.remove(self) # type: ignore + + def __pre_assign_children(self: T, new_children: Iterable[T]) -> None: + """Custom method to check before attaching children + Can be overridden with `_DAGNode__pre_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + def __post_assign_children(self: T, new_children: Iterable[T]) -> None: + """Custom method to check after attaching children + Can be overridden with `_DAGNode__post_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + @property + def ancestors(self: T) -> Iterable[T]: + """Get iterator to yield all ancestors of self, does not include self + + Returns: + (Iterable[Self]) + """ + if not len(list(self.parents)): + return () + + def _recursive_parent(node: T) -> Iterable[T]: + """Recursively yield parent of current node, returns earliest to latest ancestor + + Args: + node (DAGNode): current node + + Returns: + (Iterable[DAGNode]) + """ + for _node in node.parents: + yield from _recursive_parent(_node) + yield _node + + ancestors = list(_recursive_parent(self)) + return list(dict.fromkeys(ancestors)) + + @property + def descendants(self: T) -> Iterable[T]: + """Get iterator to yield all descendants of self, does not include self + + Returns: + (Iterable[Self]) + """ + descendants = preorder_iter(self, filter_condition=lambda _node: _node != self) + return list(dict.fromkeys(descendants)) + + @property + def siblings(self: T) -> Iterable[T]: + """Get siblings of self + + Returns: + (Iterable[Self]) + """ + if self.is_root: + return () + return tuple( + child + for parent in self.parents + for child in parent.children + if child is not self + ) + + @property + def node_name(self) -> str: + """Get node name + + Returns: + (str) + """ + return self.name + + @property + def is_root(self) -> bool: + """Get indicator if self is root node + + Returns: + (bool) + """ + return not len(list(self.parents)) + + @property + def is_leaf(self) -> bool: + """Get indicator if self is leaf node + + Returns: + (bool) + """ + return not len(list(self.children)) + + @classmethod + def from_dict(cls, input_dict: Dict[str, Any]) -> DAGNode: + """Construct node from dictionary, all keys of dictionary will be stored as class attributes + Input dictionary must have key `name` if not `Node` will not have any name + + Examples: + >>> from bigtree import DAGNode + >>> a = DAGNode.from_dict({"name": "a", "age": 90}) + + Args: + input_dict (Dict[str, Any]): dictionary with node information, key: attribute name, value: attribute value + + Returns: + (DAGNode) + """ + return cls(**input_dict) + + def describe( + self, exclude_attributes: List[str] = [], exclude_prefix: str = "" + ) -> List[Tuple[str, Any]]: + """Get node information sorted by attribute name, returns list of tuples + + Args: + exclude_attributes (List[str]): list of attributes to exclude + exclude_prefix (str): prefix of attributes to exclude + + Returns: + (List[Tuple[str, Any]]) + """ + return [ + item + for item in sorted(self.__dict__.items(), key=lambda item: item[0]) + if (item[0] not in exclude_attributes) + and (not len(exclude_prefix) or not item[0].startswith(exclude_prefix)) + ] + + def get_attr(self, attr_name: str, default_value: Any = None) -> Any: + """Get value of node attribute + Returns default value if attribute name does not exist + + Args: + attr_name (str): attribute name + default_value (Any): default value if attribute does not exist, defaults to None + + Returns: + (Any) + """ + try: + return getattr(self, attr_name) + except AttributeError: + return default_value + + def set_attrs(self, attrs: Dict[str, Any]) -> None: + """Set node attributes + + Examples: + >>> from bigtree.node.dagnode import DAGNode + >>> a = DAGNode('a') + >>> a.set_attrs({"age": 90}) + >>> a + DAGNode(a, age=90) + + Args: + attrs (Dict[str, Any]): attribute dictionary, + key: attribute name, value: attribute value + """ + self.__dict__.update(attrs) + + def go_to(self: T, node: T) -> List[List[T]]: + """Get list of possible paths from current node to specified node from same tree + + Examples: + >>> from bigtree import DAGNode + >>> a = DAGNode("a") + >>> b = DAGNode("b") + >>> c = DAGNode("c") + >>> d = DAGNode("d") + >>> a >> c + >>> b >> c + >>> c >> d + >>> a >> d + >>> a.go_to(c) + [[DAGNode(a, ), DAGNode(c, )]] + >>> a.go_to(d) + [[DAGNode(a, ), DAGNode(c, ), DAGNode(d, )], [DAGNode(a, ), DAGNode(d, )]] + >>> a.go_to(b) + Traceback (most recent call last): + ... + bigtree.utils.exceptions.TreeError: It is not possible to go to DAGNode(b, ) + + Args: + node (Self): node to travel to from current node, inclusive of start and end node + + Returns: + (List[List[Self]]) + """ + if not isinstance(node, DAGNode): + raise TypeError( + f"Expect node to be DAGNode type, received input type {type(node)}" + ) + if self == node: + return [[self]] + if node not in self.descendants: + raise TreeError(f"It is not possible to go to {node}") + + self.__path: List[List[T]] = [] + + def _recursive_path(_node: T, _path: List[T]) -> Optional[List[T]]: + """Get path to specified node + + Args: + _node (DAGNode): current node + _path (List[DAGNode]): current path, from start node to current node, excluding current node + + Returns: + (List[DAGNode]) + """ + if _node: # pragma: no cover + _path.append(_node) + if _node == node: + return _path + for _child in _node.children: + ans = _recursive_path(_child, _path.copy()) + if ans: + self.__path.append(ans) + return None + + _recursive_path(self, []) + return self.__path + + def copy(self: T) -> T: + """Deep copy self; clone DAGNode + + Examples: + >>> from bigtree.node.dagnode import DAGNode + >>> a = DAGNode('a') + >>> a_copy = a.copy() + + Returns: + (Self) + """ + return copy.deepcopy(self) + + def __copy__(self: T) -> T: + """Shallow copy self + + Examples: + >>> import copy + >>> from bigtree.node.dagnode import DAGNode + >>> a = DAGNode('a') + >>> a_copy = copy.deepcopy(a) + + Returns: + (Self) + """ + obj: T = type(self).__new__(self.__class__) + obj.__dict__.update(self.__dict__) + return obj + + def __getitem__(self, child_name: str) -> T: + """Get child by name identifier + + Args: + child_name (str): name of child node + + Returns: + (Self): child node + """ + from bigtree.tree.search import find_child_by_name + + return find_child_by_name(self, child_name) # type: ignore + + def __delitem__(self, child_name: str) -> None: + """Delete child by name identifier, will not throw error if child does not exist + + Args: + child_name (str): name of child node + """ + from bigtree.tree.search import find_child_by_name + + child = find_child_by_name(self, child_name) + if child: + self.__children.remove(child) # type: ignore + child.__parents.remove(self) # type: ignore + + def __repr__(self) -> str: + """Print format of DAGNode + + Returns: + (str) + """ + class_name = self.__class__.__name__ + node_dict = self.describe(exclude_attributes=["name"]) + node_description = ", ".join( + [f"{k}={v}" for k, v in node_dict if not k.startswith("_")] + ) + return f"{class_name}({self.node_name}, {node_description})" + + def __rshift__(self: T, other: T) -> None: + """Set children using >> bitshift operator for self >> children (other) + + Args: + other (Self): other node, children + """ + other.parents = [self] + + def __lshift__(self: T, other: T) -> None: + """Set parent using << bitshift operator for self << parent (other) + + Args: + other (Self): other node, parent + """ + self.parents = [other] + + def __iter__(self) -> Generator[T, None, None]: + """Iterate through child nodes + + Returns: + (Self): child node + """ + yield from self.children # type: ignore + + def __contains__(self, other_node: T) -> bool: + """Check if child node exists + + Args: + other_node (T): child node + + Returns: + (bool) + """ + return other_node in self.children + + +T = TypeVar("T", bound=DAGNode) diff --git a/python310/packages/bigtree/node/node.py b/python310/packages/bigtree/node/node.py new file mode 100644 index 0000000..6ffd920 --- /dev/null +++ b/python310/packages/bigtree/node/node.py @@ -0,0 +1,261 @@ +from __future__ import annotations + +from collections import Counter +from typing import Any, List, TypeVar + +from bigtree.node.basenode import BaseNode +from bigtree.utils.exceptions import TreeError + + +class Node(BaseNode): + """ + Node is an extension of BaseNode, and is able to extend to any Python class. + Nodes can have attributes if they are initialized from `Node`, *dictionary*, or *pandas DataFrame*. + + Nodes can be linked to each other with `parent` and `children` setter methods. + + Examples: + >>> from bigtree import Node + >>> a = Node("a") + >>> b = Node("b") + >>> c = Node("c") + >>> d = Node("d") + >>> b.parent = a + >>> b.children = [c, d] + + Directly passing `parent` argument. + + >>> from bigtree import Node + >>> a = Node("a") + >>> b = Node("b", parent=a) + >>> c = Node("c", parent=b) + >>> d = Node("d", parent=b) + + Directly passing `children` argument. + + >>> from bigtree import Node + >>> d = Node("d") + >>> c = Node("c") + >>> b = Node("b", children=[c, d]) + >>> a = Node("a", children=[b]) + + **Node Creation** + + Node can be created by instantiating a `Node` class or by using a *dictionary*. + If node is created with dictionary, all keys of dictionary will be stored as class attributes. + + >>> from bigtree import Node + >>> a = Node.from_dict({"name": "a", "age": 90}) + + **Node Attributes** + + These are node attributes that have getter and/or setter methods. + + Get and set `Node` configuration + + 1. ``sep``: Get/set separator for path name + + Get `Node` configuration + + 1. ``node_name``: Get node name, without accessing `name` directly + 2. ``path_name``: Get path name from root, separated by `sep` + + **Node Methods** + + These are methods available to be performed on `Node`. + + `Node` methods + + 1. ``show()``: Print tree to console + 2. ``hshow()``: Print tree in horizontal orientation to console + + ---- + + """ + + def __init__(self, name: str = "", sep: str = "/", **kwargs: Any): + self.name = name + self._sep = sep + super().__init__(**kwargs) + if not self.node_name: + raise TreeError("Node must have a `name` attribute") + + @property + def sep(self) -> str: + """Get separator, gets from root node + + Returns: + (str) + """ + if self.parent is None: + return self._sep + return self.parent.sep + + @sep.setter + def sep(self, value: str) -> None: + """Set separator, affects root node + + Args: + value (str): separator to replace default separator + """ + self.root._sep = value + + @property + def node_name(self) -> str: + """Get node name + + Returns: + (str) + """ + return self.name + + @property + def path_name(self) -> str: + """Get path name, separated by self.sep + + Returns: + (str) + """ + ancestors = [self] + list(self.ancestors) + sep = ancestors[-1].sep + return sep + sep.join([str(node.name) for node in reversed(ancestors)]) + + def __pre_assign_children(self: T, new_children: List[T]) -> None: + """Custom method to check before attaching children + Can be overridden with `_Node__pre_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + def __post_assign_children(self: T, new_children: List[T]) -> None: + """Custom method to check after attaching children + Can be overridden with `_Node__post_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + def __pre_assign_parent(self: T, new_parent: T) -> None: + """Custom method to check before attaching parent + Can be overridden with `_Node__pre_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + def __post_assign_parent(self: T, new_parent: T) -> None: + """Custom method to check after attaching parent + Can be overridden with `_Node__post_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + def _BaseNode__pre_assign_parent(self: T, new_parent: T) -> None: + """Do not allow duplicate nodes of same path + + Args: + new_parent (Self): new parent to be added + """ + self.__pre_assign_parent(new_parent) + if new_parent is not None: + if any( + child.node_name == self.node_name and child is not self + for child in new_parent.children + ): + raise TreeError( + f"Duplicate node with same path\n" + f"There exist a node with same path {new_parent.path_name}{new_parent.sep}{self.node_name}" + ) + + def _BaseNode__post_assign_parent(self: T, new_parent: T) -> None: + """No rules + + Args: + new_parent (Self): new parent to be added + """ + self.__post_assign_parent(new_parent) + + def _BaseNode__pre_assign_children(self: T, new_children: List[T]) -> None: + """Do not allow duplicate nodes of same path + + Args: + new_children (List[Self]): new children to be added + """ + self.__pre_assign_children(new_children) + children_names = [node.node_name for node in new_children] + duplicate_names = [ + item[0] for item in Counter(children_names).items() if item[1] > 1 + ] + if len(duplicate_names): + duplicate_names_str = " and ".join( + [f"{self.path_name}{self.sep}{name}" for name in duplicate_names] + ) + raise TreeError( + f"Duplicate node with same path\n" + f"Attempting to add nodes with same path {duplicate_names_str}" + ) + + def _BaseNode__post_assign_children(self: T, new_children: List[T]) -> None: + """No rules + + Args: + new_children (List[Self]): new children to be added + """ + self.__post_assign_children(new_children) + + def show(self, **kwargs: Any) -> None: + """Print tree to console, takes in same keyword arguments as `print_tree` function""" + from bigtree.tree.export import print_tree + + print_tree(self, **kwargs) + + def hshow(self, **kwargs: Any) -> None: + """Print tree in horizontal orientation to console, takes in same keyword arguments as `hprint_tree` function""" + from bigtree.tree.export import hprint_tree + + hprint_tree(self, **kwargs) + + def __getitem__(self, child_name: str) -> T: + """Get child by name identifier + + Args: + child_name (str): name of child node + + Returns: + (Self): child node + """ + from bigtree.tree.search import find_child_by_name + + return find_child_by_name(self, child_name) # type: ignore + + def __delitem__(self, child_name: str) -> None: + """Delete child by name identifier, will not throw error if child does not exist + + Args: + child_name (str): name of child node + """ + from bigtree.tree.search import find_child_by_name + + child = find_child_by_name(self, child_name) + if child: + child.parent = None + + def __repr__(self) -> str: + """Print format of Node + + Returns: + (str) + """ + class_name = self.__class__.__name__ + node_dict = self.describe(exclude_prefix="_", exclude_attributes=["name"]) + node_description = ", ".join([f"{k}={v}" for k, v in node_dict]) + return f"{class_name}({self.path_name}, {node_description})" + + +T = TypeVar("T", bound=Node) diff --git a/python310/packages/bigtree/py.typed b/python310/packages/bigtree/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/python310/packages/bigtree/tree/__init__.py b/python310/packages/bigtree/tree/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python310/packages/bigtree/tree/construct.py b/python310/packages/bigtree/tree/construct.py new file mode 100644 index 0000000..4b71f10 --- /dev/null +++ b/python310/packages/bigtree/tree/construct.py @@ -0,0 +1,1327 @@ +from __future__ import annotations + +import re +from collections import OrderedDict, defaultdict +from typing import Any, Dict, Iterable, List, Optional, Tuple, Type + +from bigtree.node.node import Node +from bigtree.tree.export import tree_to_dataframe +from bigtree.tree.search import find_child_by_name, find_name +from bigtree.utils.constants import NewickCharacter, NewickState +from bigtree.utils.exceptions import ( + DuplicatedNodeError, + TreeError, + optional_dependencies_pandas, +) + +try: + import pandas as pd +except ImportError: # pragma: no cover + pd = None + +__all__ = [ + "add_path_to_tree", + "add_dict_to_tree_by_path", + "add_dict_to_tree_by_name", + "add_dataframe_to_tree_by_path", + "add_dataframe_to_tree_by_name", + "str_to_tree", + "list_to_tree", + "list_to_tree_by_relation", + "dict_to_tree", + "nested_dict_to_tree", + "dataframe_to_tree", + "dataframe_to_tree_by_relation", + "newick_to_tree", +] + + +def add_path_to_tree( + tree: Node, + path: str, + sep: str = "/", + duplicate_name_allowed: bool = True, + node_attrs: Dict[str, Any] = {}, +) -> Node: + """Add nodes and attributes to existing tree *in-place*, return node of path added. + Adds to existing tree from list of path strings. + + Path should contain ``Node`` name, separated by `sep`. + + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + - Path separator `sep` is for the input `path` and can differ from existing tree. + + Path can start from root node `name`, or start with `sep`. + + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc., and should not start with another root node. + + Examples: + >>> from bigtree import add_path_to_tree, Node + >>> root = Node("a") + >>> add_path_to_tree(root, "a/b/c") + Node(/a/b/c, ) + >>> root.show() + a + └── b + └── c + + Args: + tree (Node): existing tree + path (str): path to be added to tree + sep (str): path separator for input `path` + duplicate_name_allowed (bool): indicator if nodes with duplicate ``Node`` name is allowed, defaults to True + node_attrs (Dict[str, Any]): attributes to add to node, key: attribute name, value: attribute value, optional + + Returns: + (Node) + """ + if not len(path): + raise ValueError("Path is empty, check `path`") + + tree_root = tree.root + tree_sep = tree_root.sep + node_type = tree_root.__class__ + branch = path.lstrip(sep).rstrip(sep).split(sep) + if branch[0] != tree_root.node_name: + raise TreeError( + f"Path does not have same root node, expected {tree_root.node_name}, received {branch[0]}\n" + f"Check your input paths or verify that path separator `sep` is set correctly" + ) + + # Grow tree + node = tree_root + parent_node = tree_root + for idx in range(1, len(branch)): + node_name = branch[idx] + node_path = tree_sep.join(branch[: idx + 1]) + if not duplicate_name_allowed: + node = find_name(tree_root, node_name) + if node and not node.path_name.endswith(node_path): + raise DuplicatedNodeError( + f"Node {node_name} already exists, try setting `duplicate_name_allowed` to True " + f"to allow `Node` with same node name" + ) + else: + node = find_child_by_name(parent_node, node_name) + if not node: + if idx == len(branch) - 1: + node_name = node_attrs.pop("name", branch[idx]) + node = node_type(node_name, **node_attrs) + else: + node = node_type(branch[idx]) + node.parent = parent_node + parent_node = node + node.set_attrs(node_attrs) + return node + + +def add_dict_to_tree_by_path( + tree: Node, + path_attrs: Dict[str, Dict[str, Any]], + sep: str = "/", + duplicate_name_allowed: bool = True, +) -> Node: + """Add nodes and attributes to tree *in-place*, return root of tree. + Adds to existing tree from nested dictionary, ``key``: path, ``value``: dict of attribute name and attribute value. + + Path should contain ``Node`` name, separated by `sep`. + + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + - Path separator `sep` is for the input `path` and can differ from existing tree. + + Path can start from root node `name`, or start with `sep`. + + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + Examples: + >>> from bigtree import Node, add_dict_to_tree_by_path + >>> root = Node("a") + >>> path_dict = { + ... "a": {"age": 90}, + ... "a/b": {"age": 65}, + ... "a/c": {"age": 60}, + ... "a/b/d": {"age": 40}, + ... "a/b/e": {"age": 35}, + ... "a/c/f": {"age": 38}, + ... "a/b/e/g": {"age": 10}, + ... "a/b/e/h": {"age": 6}, + ... } + >>> root = add_dict_to_tree_by_path(root, path_dict) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + Args: + tree (Node): existing tree + path_attrs (Dict[str, Dict[str, Any]]): dictionary containing node path and attribute information, + key: node path, value: dict of node attribute name and attribute value + sep (str): path separator for input `path_attrs` + duplicate_name_allowed (bool): indicator if nodes with duplicate ``Node`` name is allowed, defaults to True + + Returns: + (Node) + """ + if not len(path_attrs): + raise ValueError("Dictionary does not contain any data, check `path_attrs`") + + tree_root = tree.root + + for k, v in path_attrs.items(): + add_path_to_tree( + tree_root, + k, + sep=sep, + duplicate_name_allowed=duplicate_name_allowed, + node_attrs=v, + ) + return tree_root + + +@optional_dependencies_pandas +def add_dict_to_tree_by_name( + tree: Node, name_attrs: Dict[str, Dict[str, Any]], join_type: str = "left" +) -> Node: + """Add attributes to tree, return *new* root of tree. + Adds to existing tree from nested dictionary, ``key``: name, ``value``: dict of attribute name and attribute value. + + Function can return all existing tree nodes or only tree nodes that are in the input dictionary keys depending on join type. + Input dictionary keys that are not existing node names will be ignored. + Note that if multiple nodes have the same name, attributes will be added to all nodes sharing the same name. + + Examples: + >>> from bigtree import Node, add_dict_to_tree_by_name + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> name_dict = { + ... "a": {"age": 90}, + ... "b": {"age": 65}, + ... } + >>> root = add_dict_to_tree_by_name(root, name_dict) + >>> root.show(attr_list=["age"]) + a [age=90] + └── b [age=65] + + Args: + tree (Node): existing tree + name_attrs (Dict[str, Dict[str, Any]]): dictionary containing node name and attribute information, + key: node name, value: dict of node attribute name and attribute value + join_type (str): join type with attribute, default of 'left' takes existing tree nodes, + if join_type is set to 'inner' it will only take tree nodes that are in `name_attrs` key and drop others + + Returns: + (Node) + """ + if join_type not in ["inner", "left"]: + raise ValueError("`join_type` must be one of 'inner' or 'left'") + + if not len(name_attrs): + raise ValueError("Dictionary does not contain any data, check `name_attrs`") + + # Convert dictionary to dataframe + data = pd.DataFrame(name_attrs).T.rename_axis("NAME").reset_index() + return add_dataframe_to_tree_by_name(tree, data=data, join_type=join_type) + + +def add_dataframe_to_tree_by_path( + tree: Node, + data: pd.DataFrame, + path_col: str = "", + attribute_cols: List[str] = [], + sep: str = "/", + duplicate_name_allowed: bool = True, +) -> Node: + """Add nodes and attributes to tree *in-place*, return root of tree. + + `path_col` and `attribute_cols` specify columns for node path and attributes to add to existing tree. + If columns are not specified, `path_col` takes first column and all other columns are `attribute_cols` + + Path in path column should contain ``Node`` name, separated by `sep`. + + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + - Path separator `sep` is for the input `path` and can differ from existing tree. + + Path in path column can start from root node `name`, or start with `sep`. + + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + Examples: + >>> import pandas as pd + >>> from bigtree import add_dataframe_to_tree_by_path, Node + >>> root = Node("a") + >>> path_data = pd.DataFrame([ + ... ["a", 90], + ... ["a/b", 65], + ... ["a/c", 60], + ... ["a/b/d", 40], + ... ["a/b/e", 35], + ... ["a/c/f", 38], + ... ["a/b/e/g", 10], + ... ["a/b/e/h", 6], + ... ], + ... columns=["PATH", "age"] + ... ) + >>> root = add_dataframe_to_tree_by_path(root, path_data) + >>> root.show(attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + │ ├── g [age=10] + │ └── h [age=6] + └── c [age=60] + └── f [age=38] + + Args: + tree (Node): existing tree + data (pd.DataFrame): data containing node path and attribute information + path_col (str): column of data containing `path_name` information, + if not set, it will take the first column of data + attribute_cols (List[str]): columns of data containing node attribute information, + if not set, it will take all columns of data except `path_col` + sep (str): path separator for input `path_col` + duplicate_name_allowed (bool): indicator if nodes with duplicate ``Node`` name is allowed, defaults to True + + Returns: + (Node) + """ + data = data.copy() + + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not path_col: + path_col = data.columns[0] + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(path_col) + + tree_root = tree.root + data[path_col] = data[path_col].str.lstrip(sep).str.rstrip(sep) + data2 = data.copy()[[path_col] + attribute_cols].astype(str).drop_duplicates() + _duplicate_check = ( + data2[path_col] + .value_counts() + .to_frame("counts") + .rename_axis(path_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate path with different attributes\nCheck {_duplicate_check}" + ) + + for row in data.to_dict(orient="index").values(): + node_attrs = row.copy() + del node_attrs[path_col] + node_attrs = {k: v for k, v in node_attrs.items() if v is not None} + add_path_to_tree( + tree_root, + row[path_col], + sep=sep, + duplicate_name_allowed=duplicate_name_allowed, + node_attrs=node_attrs, + ) + return tree_root + + +@optional_dependencies_pandas +def add_dataframe_to_tree_by_name( + tree: Node, + data: pd.DataFrame, + name_col: str = "", + attribute_cols: List[str] = [], + join_type: str = "left", +) -> Node: + """Add attributes to tree, return *new* root of tree. + + `name_col` and `attribute_cols` specify columns for node name and attributes to add to existing tree. + If columns are not specified, the first column will be taken as name column and all other columns as attributes. + + Function can return all existing tree nodes or only tree nodes that are in the input data node names. + Input data node names that are not existing node names will be ignored. + Note that if multiple nodes have the same name, attributes will be added to all nodes sharing same name. + + Examples: + >>> import pandas as pd + >>> from bigtree import add_dataframe_to_tree_by_name, Node + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> name_data = pd.DataFrame([ + ... ["a", 90], + ... ["b", 65], + ... ], + ... columns=["NAME", "age"] + ... ) + >>> root = add_dataframe_to_tree_by_name(root, name_data) + >>> root.show(attr_list=["age"]) + a [age=90] + └── b [age=65] + + Args: + tree (Node): existing tree + data (pd.DataFrame): data containing node name and attribute information + name_col (str): column of data containing `name` information, + if not set, it will take the first column of data + attribute_cols (List[str]): column(s) of data containing node attribute information, + if not set, it will take all columns of data except `path_col` + join_type (str): join type with attribute, default of 'left' takes existing tree nodes, + if join_type is set to 'inner' it will only take tree nodes with attributes and drop the other nodes + + Returns: + (Node) + """ + data = data.copy() + + if join_type not in ["inner", "left"]: + raise ValueError("`join_type` must be one of 'inner' or 'left'") + + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not name_col: + name_col = data.columns[0] + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(name_col) + + # Attribute data + path_col = "PATH" + data2 = data.copy()[[name_col] + attribute_cols].astype(str).drop_duplicates() + _duplicate_check = ( + data2[name_col] + .value_counts() + .to_frame("counts") + .rename_axis(name_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate name with different attributes\nCheck {_duplicate_check}" + ) + + # Tree data + tree_root = tree.root + sep = tree_root.sep + node_type = tree_root.__class__ + data_tree = tree_to_dataframe( + tree_root, name_col=name_col, path_col=path_col, all_attrs=True + ) + common_cols = list(set(data_tree.columns).intersection(attribute_cols)) + data_tree = data_tree.drop(columns=common_cols) + + # Attribute data + data_tree_attrs = pd.merge(data_tree, data, on=name_col, how=join_type) + data_tree_attrs = data_tree_attrs.drop(columns=name_col) + + return dataframe_to_tree( + data_tree_attrs, path_col=path_col, sep=sep, node_type=node_type + ) + + +def str_to_tree( + tree_string: str, + tree_prefix_list: List[str] = [], + node_type: Type[Node] = Node, +) -> Node: + r"""Construct tree from tree string + + Examples: + >>> from bigtree import str_to_tree + >>> tree_str = 'a\n├── b\n│ ├── d\n│ └── e\n│ ├── g\n│ └── h\n└── c\n └── f' + >>> root = str_to_tree(tree_str, tree_prefix_list=["├──", "└──"]) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + Args: + tree_string (str): String to construct tree + tree_prefix_list (List[str]): List of prefix to mark the end of tree branch/stem and start of node name, optional. + If not specified, it will infer unicode characters and whitespace as prefix. + node_type (Type[Node]): node type of tree to be created, defaults to ``Node`` + + Returns: + (Node) + """ + tree_string = tree_string.strip("\n") + if not len(tree_string): + raise ValueError("Tree string does not contain any data, check `tree_string`") + tree_list = tree_string.split("\n") + tree_root = node_type(tree_list[0]) + + # Infer prefix length + prefix_length = None + cur_parent = tree_root + for node_str in tree_list[1:]: + if len(tree_prefix_list): + node_name = re.split("|".join(tree_prefix_list), node_str)[-1].lstrip() + else: + node_name = node_str.encode("ascii", "ignore").decode("ascii").lstrip() + + # Find node parent + if not prefix_length: + prefix_length = node_str.index(node_name) + if not prefix_length: + raise ValueError( + f"Invalid prefix, prefix should be unicode character or whitespace, " + f"otherwise specify one or more prefixes in `tree_prefix_list`, check: {node_str}" + ) + node_prefix_length = node_str.index(node_name) + if node_prefix_length % prefix_length: + raise ValueError( + f"Tree string have different prefix length, check branch: {node_str}" + ) + while cur_parent.depth > node_prefix_length / prefix_length: + cur_parent = cur_parent.parent + + # Link node + child_node = node_type(node_name) + child_node.parent = cur_parent + cur_parent = child_node + + return tree_root + + +def list_to_tree( + paths: Iterable[str], + sep: str = "/", + duplicate_name_allowed: bool = True, + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from list of path strings. + + Path should contain ``Node`` name, separated by `sep`. + + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + + Path can start from root node `name`, or start with `sep`. + + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + Examples: + >>> from bigtree import list_to_tree + >>> path_list = ["a/b", "a/c", "a/b/d", "a/b/e", "a/c/f", "a/b/e/g", "a/b/e/h"] + >>> root = list_to_tree(path_list) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + Args: + paths (Iterable[str]): list containing path strings + sep (str): path separator for input `paths` and created tree, defaults to `/` + duplicate_name_allowed (bool): indicator if nodes with duplicate ``Node`` name is allowed, defaults to True + node_type (Type[Node]): node type of tree to be created, defaults to ``Node`` + + Returns: + (Node) + """ + if not paths: + raise ValueError("Path list does not contain any data, check `paths`") + + # Remove duplicates + paths = list(OrderedDict.fromkeys(paths)) + + # Construct root node + root_name = paths[0].lstrip(sep).split(sep)[0] + root_node = node_type(root_name) + root_node.sep = sep + + for path in paths: + add_path_to_tree( + root_node, path, sep=sep, duplicate_name_allowed=duplicate_name_allowed + ) + root_node.sep = sep + return root_node + + +@optional_dependencies_pandas +def list_to_tree_by_relation( + relations: Iterable[Tuple[str, str]], + allow_duplicates: bool = False, + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from list of tuple containing parent-child names. + + Since tree is created from parent-child names, only names of leaf nodes may be repeated. + Error will be thrown if names of intermediate nodes are repeated as there will be confusion. + This error can be ignored by setting `allow_duplicates` to be True. + + Examples: + >>> from bigtree import list_to_tree_by_relation + >>> relations_list = [("a", "b"), ("a", "c"), ("b", "d"), ("b", "e"), ("c", "f"), ("e", "g"), ("e", "h")] + >>> root = list_to_tree_by_relation(relations_list) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + Args: + relations (Iterable[Tuple[str, str]]): list containing tuple containing parent-child names + allow_duplicates (bool): allow duplicate intermediate nodes such that child node will + be tagged to multiple parent nodes, defaults to False + node_type (Type[Node]): node type of tree to be created, defaults to ``Node`` + + Returns: + (Node) + """ + if not relations: + raise ValueError("Path list does not contain any data, check `relations`") + + relation_data = pd.DataFrame(relations, columns=["parent", "child"]) + return dataframe_to_tree_by_relation( + relation_data, + child_col="child", + parent_col="parent", + allow_duplicates=allow_duplicates, + node_type=node_type, + ) + + +@optional_dependencies_pandas +def dict_to_tree( + path_attrs: Dict[str, Any], + sep: str = "/", + duplicate_name_allowed: bool = True, + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from nested dictionary using path, + ``key``: path, ``value``: dict of attribute name and attribute value. + + Path should contain ``Node`` name, separated by `sep`. + + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + + Path can start from root node `name`, or start with `sep`. + + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + Examples: + >>> from bigtree import dict_to_tree + >>> path_dict = { + ... "a": {"age": 90}, + ... "a/b": {"age": 65}, + ... "a/c": {"age": 60}, + ... "a/b/d": {"age": 40}, + ... "a/b/e": {"age": 35}, + ... "a/c/f": {"age": 38}, + ... "a/b/e/g": {"age": 10}, + ... "a/b/e/h": {"age": 6}, + ... } + >>> root = dict_to_tree(path_dict) + >>> root.show(attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + │ ├── g [age=10] + │ └── h [age=6] + └── c [age=60] + └── f [age=38] + + Args: + path_attrs (Dict[str, Any]): dictionary containing path and node attribute information, + key: path, value: dict of tree attribute and attribute value + sep (str): path separator of input `path_attrs` and created tree, defaults to `/` + duplicate_name_allowed (bool): indicator if nodes with duplicate ``Node`` name is allowed, defaults to True + node_type (Type[Node]): node type of tree to be created, defaults to ``Node`` + + Returns: + (Node) + """ + if not len(path_attrs): + raise ValueError("Dictionary does not contain any data, check `path_attrs`") + + # Convert dictionary to dataframe + data = pd.DataFrame(path_attrs).T.rename_axis("PATH").reset_index() + return dataframe_to_tree( + data, + sep=sep, + duplicate_name_allowed=duplicate_name_allowed, + node_type=node_type, + ) + + +def nested_dict_to_tree( + node_attrs: Dict[str, Any], + name_key: str = "name", + child_key: str = "children", + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from nested recursive dictionary. + + - ``key``: `name_key`, `child_key`, or any attributes key. + - ``value`` of `name_key` (str): node name. + - ``value`` of `child_key` (List[Dict[str, Any]]): list of dict containing `name_key` and `child_key` (recursive). + + Examples: + >>> from bigtree import nested_dict_to_tree + >>> path_dict = { + ... "name": "a", + ... "age": 90, + ... "children": [ + ... {"name": "b", + ... "age": 65, + ... "children": [ + ... {"name": "d", "age": 40}, + ... {"name": "e", "age": 35, "children": [ + ... {"name": "g", "age": 10}, + ... ]}, + ... ]}, + ... ], + ... } + >>> root = nested_dict_to_tree(path_dict) + >>> root.show(attr_list=["age"]) + a [age=90] + └── b [age=65] + ├── d [age=40] + └── e [age=35] + └── g [age=10] + + Args: + node_attrs (Dict[str, Any]): dictionary containing node, children, and node attribute information, + key: `name_key` and `child_key` + value of `name_key` (str): node name + value of `child_key` (List[Dict[str, Any]]): list of dict containing `name_key` and `child_key` (recursive) + name_key (str): key of node name, value is type str + child_key (str): key of child list, value is type list + node_type (Type[Node]): node type of tree to be created, defaults to ``Node`` + + Returns: + (Node) + """ + if not node_attrs: + raise ValueError("Dictionary does not contain any data, check `node_attrs`") + + def _recursive_add_child( + child_dict: Dict[str, Any], parent_node: Optional[Node] = None + ) -> Node: + """Recursively add child to tree, given child attributes and parent node. + + Args: + child_dict (Dict[str, Any]): child to be added to tree, from dictionary + parent_node (Node): parent node to be assigned to child node, defaults to None + + Returns: + (Node) + """ + child_dict = child_dict.copy() + node_name = child_dict.pop(name_key) + node_children = child_dict.pop(child_key, []) + if not isinstance(node_children, List): + raise TypeError( + f"child_key {child_key} should be List type, received {node_children}" + ) + node = node_type(node_name, parent=parent_node, **child_dict) + for _child in node_children: + _recursive_add_child(_child, parent_node=node) + return node + + root_node = _recursive_add_child(node_attrs) + return root_node + + +def dataframe_to_tree( + data: pd.DataFrame, + path_col: str = "", + attribute_cols: List[str] = [], + sep: str = "/", + duplicate_name_allowed: bool = True, + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from pandas DataFrame using path, return root of tree. + + `path_col` and `attribute_cols` specify columns for node path and attributes to construct tree. + If columns are not specified, `path_col` takes first column and all other columns are `attribute_cols`. + + Path in path column can start from root node `name`, or start with `sep`. + + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + Path in path column should contain ``Node`` name, separated by `sep`. + + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + + All paths should start from the same root node. + + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + Examples: + >>> import pandas as pd + >>> from bigtree import dataframe_to_tree + >>> path_data = pd.DataFrame([ + ... ["a", 90], + ... ["a/b", 65], + ... ["a/c", 60], + ... ["a/b/d", 40], + ... ["a/b/e", 35], + ... ["a/c/f", 38], + ... ["a/b/e/g", 10], + ... ["a/b/e/h", 6], + ... ], + ... columns=["PATH", "age"] + ... ) + >>> root = dataframe_to_tree(path_data) + >>> root.show(attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + │ ├── g [age=10] + │ └── h [age=6] + └── c [age=60] + └── f [age=38] + + Args: + data (pd.DataFrame): data containing path and node attribute information + path_col (str): column of data containing `path_name` information, + if not set, it will take the first column of data + attribute_cols (List[str]): columns of data containing node attribute information, + if not set, it will take all columns of data except `path_col` + sep (str): path separator of input `path_col` and created tree, defaults to `/` + duplicate_name_allowed (bool): indicator if nodes with duplicate ``Node`` name is allowed, defaults to True + node_type (Type[Node]): node type of tree to be created, defaults to ``Node`` + + Returns: + (Node) + """ + data = data.copy() + + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not path_col: + path_col = data.columns[0] + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(path_col) + + data[path_col] = data[path_col].str.lstrip(sep).str.rstrip(sep) + data2 = data.copy()[[path_col] + attribute_cols].astype(str).drop_duplicates() + _duplicate_check = ( + data2[path_col] + .value_counts() + .to_frame("counts") + .rename_axis(path_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate path with different attributes\nCheck {_duplicate_check}" + ) + + root_name = data[path_col].values[0].split(sep)[0] + root_node_data = data[data[path_col] == root_name] + if len(root_node_data): + root_node_kwargs = list( + root_node_data[attribute_cols].to_dict(orient="index").values() + )[0] + root_name = root_node_kwargs.pop("name", root_name) + root_node = node_type(root_name, **root_node_kwargs) + else: + root_node = node_type(root_name) + add_dataframe_to_tree_by_path( + root_node, + data, + path_col=path_col, + attribute_cols=attribute_cols, + sep=sep, + duplicate_name_allowed=duplicate_name_allowed, + ) + root_node.sep = sep + return root_node + + +def dataframe_to_tree_by_relation( + data: pd.DataFrame, + child_col: str = "", + parent_col: str = "", + attribute_cols: List[str] = [], + allow_duplicates: bool = False, + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from pandas DataFrame using parent and child names, return root of tree. + + Since tree is created from parent-child names, only names of leaf nodes may be repeated. + Error will be thrown if names of intermediate nodes are repeated as there will be confusion. + This error can be ignored by setting `allow_duplicates` to be True. + + `child_col` and `parent_col` specify columns for child name and parent name to construct tree. + `attribute_cols` specify columns for node attribute for child name. + If columns are not specified, `child_col` takes first column, `parent_col` takes second column, and all other + columns are `attribute_cols`. + + Examples: + >>> import pandas as pd + >>> from bigtree import dataframe_to_tree_by_relation + >>> relation_data = pd.DataFrame([ + ... ["a", None, 90], + ... ["b", "a", 65], + ... ["c", "a", 60], + ... ["d", "b", 40], + ... ["e", "b", 35], + ... ["f", "c", 38], + ... ["g", "e", 10], + ... ["h", "e", 6], + ... ], + ... columns=["child", "parent", "age"] + ... ) + >>> root = dataframe_to_tree_by_relation(relation_data) + >>> root.show(attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + │ ├── g [age=10] + │ └── h [age=6] + └── c [age=60] + └── f [age=38] + + Args: + data (pd.DataFrame): data containing path and node attribute information + child_col (str): column of data containing child name information, defaults to None + if not set, it will take the first column of data + parent_col (str): column of data containing parent name information, defaults to None + if not set, it will take the second column of data + attribute_cols (List[str]): columns of data containing node attribute information, + if not set, it will take all columns of data except `child_col` and `parent_col` + allow_duplicates (bool): allow duplicate intermediate nodes such that child node will + be tagged to multiple parent nodes, defaults to False + node_type (Type[Node]): node type of tree to be created, defaults to ``Node`` + + Returns: + (Node) + """ + data = data.copy() + + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not child_col: + child_col = data.columns[0] + if not parent_col: + parent_col = data.columns[1] + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(child_col) + attribute_cols.remove(parent_col) + + data_check = data.copy()[[child_col, parent_col]].drop_duplicates() + # Filter for child nodes that are parent of other nodes + if not allow_duplicates: + data_check = data_check[data_check[child_col].isin(data_check[parent_col])] + _duplicate_check = ( + data_check[child_col] + .value_counts() + .to_frame("counts") + .rename_axis(child_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate child with different parent where the child is also a parent node.\n" + f"Duplicated node names should not happen, but can only exist in leaf nodes to avoid confusion.\n" + f"Check {_duplicate_check}" + ) + + # If parent-child contains None -> root + root_row = data[data[parent_col].isnull()] + root_names = list(root_row[child_col]) + if not len(root_names): + root_names = list(set(data[parent_col]) - set(data[child_col])) + if len(root_names) != 1: + raise ValueError( + f"Unable to determine root node\nPossible root nodes: {root_names}" + ) + root_name = root_names[0] + root_node_data = data[data[child_col] == root_name] + if len(root_node_data): + root_node_kwargs = list( + root_node_data[attribute_cols].to_dict(orient="index").values() + )[0] + root_name = root_node_kwargs.pop("name", root_name) + root_node = node_type(root_name, **root_node_kwargs) + else: + root_node = node_type(root_name) + + def _retrieve_attr(_row: Dict[str, Any]) -> Dict[str, Any]: + """Retrieve node attributes from dictionary, remove parent and child column from dictionary. + + Args: + _row (Dict[str, Any]): node attributes + + Returns: + (Dict[str, Any]) + """ + node_attrs = _row.copy() + node_attrs["name"] = node_attrs[child_col] + del node_attrs[child_col] + del node_attrs[parent_col] + _node_attrs = {k: v for k, v in node_attrs.items() if v is not None} + return _node_attrs + + def _recursive_add_child(parent_node: Node) -> None: + """Recursive add child to tree, given current node. + + Args: + parent_node (Node): parent node + """ + child_rows = data[data[parent_col] == parent_node.node_name] + + for row in child_rows.to_dict(orient="index").values(): + child_node = node_type(**_retrieve_attr(row)) + child_node.parent = parent_node + _recursive_add_child(child_node) + + # Create root node attributes + if len(root_row): + row = list(root_row.to_dict(orient="index").values())[0] + root_node.set_attrs(_retrieve_attr(row)) + _recursive_add_child(root_node) + return root_node + + +def newick_to_tree( + tree_string: str, + length_attr: str = "length", + attr_prefix: str = "&&NHX:", + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from Newick notation, return root of tree. + + In the Newick Notation (or New Hampshire Notation) + + - Tree is represented in round brackets i.e., `(child1,child2,child3)parent`. + - If there are nested tree, they will be in nested round brackets i.e., `((grandchild1)child1,(grandchild2,grandchild3)child2)parent`. + - If there is length attribute, they will be beside the name i.e., `(child1:0.5,child2:0.1)parent`. + - If there are other attributes, attributes are represented in square brackets i.e., `(child1:0.5[S:human],child2:0.1[S:human])parent[S:parent]`. + + Variations supported + + - Support special characters (`[`, `]`, `(`, `)`, `:`, `,`) in node name, attribute name, and attribute values if + they are enclosed in single quotes i.e., '(name:!)'. + - If there are no node names, it will be auto-filled with convention `nodeN` with N representing a number. + + Examples: + >>> from bigtree import newick_to_tree + >>> root = newick_to_tree("((d,e)b,c)a") + >>> root.show() + a + ├── b + │ ├── d + │ └── e + └── c + + >>> root = newick_to_tree("((d:40,e:35)b:65,c:60)a", length_attr="age") + >>> root.show(attr_list=["age"]) + a + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + └── c [age=60] + + >>> root = newick_to_tree( + ... "((d:40[&&NHX:species=human],e:35[&&NHX:species=human])b:65[&&NHX:species=human],c:60[&&NHX:species=human])a[&&NHX:species=human]", + ... length_attr="age", + ... ) + >>> root.show(all_attrs=True) + a [species=human] + ├── b [age=65, species=human] + │ ├── d [age=40, species=human] + │ └── e [age=35, species=human] + └── c [age=60, species=human] + + Args: + tree_string (str): Newick notation to construct tree + length_attr (str): attribute name to store node length, optional, defaults to 'length' + attr_prefix (str): prefix before all attributes, within square bracket, used to detect attributes, defaults to "&&NHX:" + node_type (Type[Node]): node type of tree to be created, defaults to ``Node`` + + Returns: + (Node) + """ + if not len(tree_string): + raise ValueError("Tree string does not contain any data, check `tree_string`") + + # Store results (for tracking) + depth_nodes: Dict[int, List[Node]] = defaultdict(list) + unlabelled_node_counter: int = 0 + current_depth: int = 1 + tree_string_idx: int = 0 + + # Store states (for assertions and checks) + current_state: NewickState = NewickState.PARSE_STRING + current_node: Optional[Node] = None + cumulative_string: str = "" + cumulative_string_value: str = "" + + def _create_node( + _new_node: Optional[Node], + _cumulative_string: str, + _unlabelled_node_counter: int, + _depth_nodes: Dict[int, List[Node]], + _current_depth: int, + ) -> Tuple[Node, int]: + """Create node at checkpoint. + + Args: + _new_node (Optional[Node]): existing node (to add length attribute), or nothing (to create a node) + _cumulative_string (str): cumulative string, contains either node name or length attribute + _unlabelled_node_counter (int): number of unlabelled nodes, updates and returns counter + _depth_nodes (Dict[int, List[Node]]): list of nodes at each depth + _current_depth (int): depth of current node or node to be created + + Returns: + (Tuple[Node, int]) + """ + if not _new_node: + if not _cumulative_string: + _cumulative_string = f"node{_unlabelled_node_counter}" + _unlabelled_node_counter += 1 + _new_node = node_type(_cumulative_string) + _depth_nodes[_current_depth].append(_new_node) + elif _cumulative_string: + _new_node.set_attrs( + { + length_attr: ( + int(_cumulative_string) + if _cumulative_string.isdigit() + else float(_cumulative_string) + ) + } + ) + + if len(_depth_nodes[_current_depth + 1]): + _new_node.children = depth_nodes[_current_depth + 1] # type: ignore + del _depth_nodes[_current_depth + 1] + return _new_node, _unlabelled_node_counter + + def _raise_value_error(tree_idx: int) -> None: + """Raise value error. + + Raises: + ValueError + """ + raise ValueError( + f"String not properly closed, check `tree_string` at index {tree_idx}" + ) + + while tree_string_idx < len(tree_string): + character = tree_string[tree_string_idx] + if character == NewickCharacter.OPEN_BRACKET: + # Check and/or change state + state_title = "Node creation start" + if current_state not in [NewickState.PARSE_STRING]: + _raise_value_error(tree_string_idx) + # Logic + current_depth += 1 + if current_node: + _raise_value_error(tree_string_idx) + if cumulative_string: + _raise_value_error(tree_string_idx) + assert ( + not cumulative_string_value + ), f"{state_title}, should not have cumulative_string_value" + tree_string_idx += 1 + continue + + if character in [ + NewickCharacter.CLOSE_BRACKET, + NewickCharacter.ATTR_START, + NewickCharacter.NODE_SEP, + ]: + # Check and/or change state + state_title = "Node creation end / Node attribute start" + if current_state not in [ + NewickState.PARSE_STRING, + NewickState.PARSE_ATTRIBUTE_NAME, + ]: + _raise_value_error(tree_string_idx) + # Logic + if character == NewickCharacter.ATTR_START: + current_state = NewickState.PARSE_ATTRIBUTE_NAME + if tree_string[tree_string_idx + 1 :].startswith( # noqa: E203 + attr_prefix + ): + tree_string_idx += len(attr_prefix) + current_node, unlabelled_node_counter = _create_node( + current_node, + cumulative_string, + unlabelled_node_counter, + depth_nodes, + current_depth, + ) + if character == NewickCharacter.CLOSE_BRACKET: + current_depth -= 1 + current_node = None + if character == NewickCharacter.NODE_SEP: + current_node = None + cumulative_string = "" + assert ( + not cumulative_string_value + ), f"{state_title}, should not have cumulative_string_value" + tree_string_idx += 1 + continue + + if character == NewickCharacter.ATTR_END: + # Check and/or change state + state_title = "Node attribute end" + if current_state not in [NewickState.PARSE_ATTRIBUTE_VALUE]: + _raise_value_error(tree_string_idx) + current_state = NewickState.PARSE_STRING + # Logic + assert current_node, f"{state_title}, should have current_node" + current_node.set_attrs({cumulative_string: cumulative_string_value}) + cumulative_string = "" + cumulative_string_value = "" + tree_string_idx += 1 + continue + + if character == NewickCharacter.ATTR_KEY_VALUE: + # Check and/or change state + state_title = "Node attribute creation" + if current_state not in [NewickState.PARSE_ATTRIBUTE_NAME]: + _raise_value_error(tree_string_idx) + current_state = NewickState.PARSE_ATTRIBUTE_VALUE + # Logic + assert current_node, f"{state_title}, should have current_node" + if not cumulative_string: + _raise_value_error(tree_string_idx) + assert ( + not cumulative_string_value + ), f"{state_title}, should not have cumulative_string_value" + tree_string_idx += 1 + continue + + if character == NewickCharacter.ATTR_QUOTE: + # Logic + quote_end_idx = tree_string.find( + NewickCharacter.ATTR_QUOTE, tree_string_idx + 1 + ) + if quote_end_idx == -1: + _raise_value_error(tree_string_idx) + if current_state in [ + NewickState.PARSE_STRING, + NewickState.PARSE_ATTRIBUTE_NAME, + ]: + if cumulative_string: + _raise_value_error(tree_string_idx) + cumulative_string = tree_string[ + tree_string_idx + 1 : quote_end_idx # noqa: E203 + ] + else: + if cumulative_string_value: + _raise_value_error(tree_string_idx) + cumulative_string_value = tree_string[ + tree_string_idx + 1 : quote_end_idx # noqa: E203 + ] + tree_string_idx = quote_end_idx + 1 + continue + + if character == NewickCharacter.SEP: + # Check and/or change state + state_title = "Node length creation / Node attribute creation" + if current_state not in [ + NewickState.PARSE_STRING, + NewickState.PARSE_ATTRIBUTE_VALUE, + ]: + _raise_value_error(tree_string_idx) + # Logic + if current_state == NewickState.PARSE_STRING: + if current_node: + _raise_value_error(tree_string_idx) + current_node, unlabelled_node_counter = _create_node( + current_node, + cumulative_string, + unlabelled_node_counter, + depth_nodes, + current_depth, + ) + cumulative_string = "" + assert ( + not cumulative_string_value + ), f"{state_title}, should not have cumulative_string_value" + tree_string_idx += 1 + continue + else: + current_state = NewickState.PARSE_ATTRIBUTE_NAME + assert current_node, f"{state_title}, should not have current_node" + current_node.set_attrs({cumulative_string: cumulative_string_value}) + cumulative_string = "" + cumulative_string_value = "" + tree_string_idx += 1 + continue + + if current_state == NewickState.PARSE_ATTRIBUTE_VALUE: + cumulative_string_value += character + else: + cumulative_string += character + tree_string_idx += 1 + + if current_depth != 1: + _raise_value_error(tree_string_idx) + + # Final root node + if len(depth_nodes[current_depth]): + current_node = depth_nodes[current_depth][0] + current_node, unlabelled_node_counter = _create_node( + current_node, + cumulative_string, + unlabelled_node_counter, + depth_nodes, + current_depth, + ) + return current_node diff --git a/python310/packages/bigtree/tree/export.py b/python310/packages/bigtree/tree/export.py new file mode 100644 index 0000000..641f6bf --- /dev/null +++ b/python310/packages/bigtree/tree/export.py @@ -0,0 +1,1660 @@ +from __future__ import annotations + +import collections +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, TypeVar, Union +from urllib.request import urlopen + +from bigtree.node.node import Node +from bigtree.utils.assertions import ( + assert_key_in_dict, + assert_str_in_list, + assert_style_in_dict, +) +from bigtree.utils.constants import ExportConstants, MermaidConstants, NewickCharacter +from bigtree.utils.exceptions import ( + optional_dependencies_image, + optional_dependencies_pandas, +) +from bigtree.utils.iterators import levelordergroup_iter, preorder_iter + +try: + import pandas as pd +except ImportError: # pragma: no cover + pd = None + +try: + import pydot +except ImportError: # pragma: no cover + pydot = None + +try: + from PIL import Image, ImageDraw, ImageFont +except ImportError: # pragma: no cover + Image = ImageDraw = ImageFont = None + + +__all__ = [ + "print_tree", + "yield_tree", + "hprint_tree", + "hyield_tree", + "tree_to_newick", + "tree_to_dict", + "tree_to_nested_dict", + "tree_to_dataframe", + "tree_to_dot", + "tree_to_pillow", + "tree_to_mermaid", +] + +T = TypeVar("T", bound=Node) + + +def print_tree( + tree: T, + node_name_or_path: str = "", + max_depth: int = 0, + all_attrs: bool = False, + attr_list: Iterable[str] = [], + attr_omit_null: bool = False, + attr_bracket: List[str] = ["[", "]"], + style: str = "const", + custom_style: Iterable[str] = [], +) -> None: + """Print tree to console, starting from `tree`. + + - Able to select which node to print from, resulting in a subtree, using `node_name_or_path` + - Able to customize for maximum depth to print, using `max_depth` + - Able to choose which attributes to show or show all attributes, using `attr_name_filter` and `all_attrs` + - Able to omit showing of attributes if it is null, using `attr_omit_null` + - Able to customize open and close brackets if attributes are shown, using `attr_bracket` + - Able to customize style, to choose from `ansi`, `ascii`, `const`, `const_bold`, `rounded`, `double`, and `custom` style + - Default style is `const` style + - If style is set to custom, user can choose their own style for stem, branch and final stem icons + - Stem, branch, and final stem symbol should have the same number of characters + + Examples: + **Printing tree** + + >>> from bigtree import Node, print_tree + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + └── c + + **Printing Sub-tree** + + >>> print_tree(root, node_name_or_path="b") + b + ├── d + └── e + + >>> print_tree(root, max_depth=2) + a + ├── b + └── c + + **Printing Attributes** + + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + └── c [age=60] + + >>> print_tree(root, attr_list=["age"], attr_bracket=["*(", ")"]) + a *(age=90) + ├── b *(age=65) + │ ├── d *(age=40) + │ └── e *(age=35) + └── c *(age=60) + + **Available Styles** + + >>> print_tree(root, style="ansi") + a + |-- b + | |-- d + | `-- e + `-- c + + >>> print_tree(root, style="ascii") + a + |-- b + | |-- d + | +-- e + +-- c + + >>> print_tree(root, style="const") + a + ├── b + │ ├── d + │ └── e + └── c + + >>> print_tree(root, style="const_bold") + a + ┣━━ b + ┃ ┣━━ d + ┃ ┗━━ e + ┗━━ c + + >>> print_tree(root, style="rounded") + a + ├── b + │ ├── d + │ ╰── e + ╰── c + + >>> print_tree(root, style="double") + a + ╠══ b + ║ ╠══ d + ║ ╚══ e + ╚══ c + + Args: + tree (Node): tree to print + node_name_or_path (str): node to print from, becomes the root node of printing + max_depth (int): maximum depth of tree to print, based on `depth` attribute, optional + all_attrs (bool): indicator to show all attributes, defaults to False, overrides `attr_list` and `attr_omit_null` + attr_list (Iterable[str]): list of node attributes to print, optional + attr_omit_null (bool): indicator whether to omit showing of null attributes, defaults to False + attr_bracket (List[str]): open and close bracket for `all_attrs` or `attr_list` + style (str): style of print, defaults to const style + custom_style (Iterable[str]): style of stem, branch and final stem, used when `style` is set to 'custom' + """ + for pre_str, fill_str, _node in yield_tree( + tree=tree, + node_name_or_path=node_name_or_path, + max_depth=max_depth, + style=style, + custom_style=custom_style, + ): + # Get node_str (node name and attributes) + attr_str = "" + if all_attrs or attr_list: + if len(attr_bracket) != 2: + raise ValueError( + f"Expect open and close brackets in `attr_bracket`, received {attr_bracket}" + ) + attr_bracket_open, attr_bracket_close = attr_bracket + if all_attrs: + attrs = _node.describe(exclude_attributes=["name"], exclude_prefix="_") + attr_str_list = [f"{k}={v}" for k, v in attrs] + else: + if attr_omit_null: + attr_str_list = [ + f"{attr_name}={_node.get_attr(attr_name)}" + for attr_name in attr_list + if _node.get_attr(attr_name) + ] + else: + attr_str_list = [ + f"{attr_name}={_node.get_attr(attr_name)}" + for attr_name in attr_list + if hasattr(_node, attr_name) + ] + attr_str = ", ".join(attr_str_list) + if attr_str: + attr_str = f" {attr_bracket_open}{attr_str}{attr_bracket_close}" + node_str = f"{_node.node_name}{attr_str}" + print(f"{pre_str}{fill_str}{node_str}") + + +def yield_tree( + tree: T, + node_name_or_path: str = "", + max_depth: int = 0, + style: str = "const", + custom_style: Iterable[str] = [], +) -> Iterable[Tuple[str, str, T]]: + """Generator method for customizing printing of tree, starting from `tree`. + + - Able to select which node to print from, resulting in a subtree, using `node_name_or_path` + - Able to customize for maximum depth to print, using `max_depth` + - Able to customize style, to choose from `ansi`, `ascii`, `const`, `const_bold`, `rounded`, `double`, and `custom` style + - Default style is `const` style + - If style is set to custom, user can choose their own style for stem, branch and final stem icons + - Stem, branch, and final stem symbol should have the same number of characters + + Examples: + **Yield tree** + + >>> from bigtree import Node, yield_tree + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> for branch, stem, node in yield_tree(root): + ... print(f"{branch}{stem}{node.node_name}") + a + ├── b + │ ├── d + │ └── e + └── c + + **Yield Sub-tree** + + >>> for branch, stem, node in yield_tree(root, node_name_or_path="b"): + ... print(f"{branch}{stem}{node.node_name}") + b + ├── d + └── e + + >>> for branch, stem, node in yield_tree(root, max_depth=2): + ... print(f"{branch}{stem}{node.node_name}") + a + ├── b + └── c + + **Available Styles** + + >>> for branch, stem, node in yield_tree(root, style="ansi"): + ... print(f"{branch}{stem}{node.node_name}") + a + |-- b + | |-- d + | `-- e + `-- c + + >>> for branch, stem, node in yield_tree(root, style="ascii"): + ... print(f"{branch}{stem}{node.node_name}") + a + |-- b + | |-- d + | +-- e + +-- c + + >>> for branch, stem, node in yield_tree(root, style="const"): + ... print(f"{branch}{stem}{node.node_name}") + a + ├── b + │ ├── d + │ └── e + └── c + + >>> for branch, stem, node in yield_tree(root, style="const_bold"): + ... print(f"{branch}{stem}{node.node_name}") + a + ┣━━ b + ┃ ┣━━ d + ┃ ┗━━ e + ┗━━ c + + >>> for branch, stem, node in yield_tree(root, style="rounded"): + ... print(f"{branch}{stem}{node.node_name}") + a + ├── b + │ ├── d + │ ╰── e + ╰── c + + >>> for branch, stem, node in yield_tree(root, style="double"): + ... print(f"{branch}{stem}{node.node_name}") + a + ╠══ b + ║ ╠══ d + ║ ╚══ e + ╚══ c + + **Printing Attributes** + + >>> for branch, stem, node in yield_tree(root, style="const"): + ... print(f"{branch}{stem}{node.node_name} [age={node.age}]") + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + └── c [age=60] + + Args: + tree (Node): tree to print + node_name_or_path (str): node to print from, becomes the root node of printing, optional + max_depth (int): maximum depth of tree to print, based on `depth` attribute, optional + style (str): style of print, defaults to const + custom_style (Iterable[str]): style of stem, branch and final stem, used when `style` is set to 'custom' + """ + from bigtree.tree.helper import get_subtree + + available_styles = ExportConstants.PRINT_STYLES + assert_style_in_dict(style, available_styles) + + tree = get_subtree(tree, node_name_or_path, max_depth) + + # Set style + if style == "custom": + if len(list(custom_style)) != 3: + raise ValueError( + "Custom style selected, please specify the style of stem, branch, and final stem in `custom_style`" + ) + style_stem, style_branch, style_stem_final = custom_style + else: + style_stem, style_branch, style_stem_final = available_styles[style] + + if not len(style_stem) == len(style_branch) == len(style_stem_final): + raise ValueError( + "`style_stem`, `style_branch`, and `style_stem_final` are of different length" + ) + + gap_str = " " * len(style_stem) + unclosed_depth = set() + initial_depth = tree.depth + for _node in preorder_iter(tree, max_depth=max_depth): + pre_str = "" + fill_str = "" + if not _node.is_root: + node_depth = _node.depth - initial_depth + + # Get fill_str (style_branch or style_stem_final) + if _node.right_sibling: + unclosed_depth.add(node_depth) + fill_str = style_branch + else: + if node_depth in unclosed_depth: + unclosed_depth.remove(node_depth) + fill_str = style_stem_final + + # Get pre_str (style_stem, style_branch, style_stem_final, or gap) + pre_str = "" + for _depth in range(1, node_depth): + if _depth in unclosed_depth: + pre_str += style_stem + else: + pre_str += gap_str + + yield pre_str, fill_str, _node + + +def hprint_tree( + tree: T, + node_name_or_path: str = "", + max_depth: int = 0, + intermediate_node_name: bool = True, + style: str = "const", + custom_style: Iterable[str] = [], +) -> None: + """Print tree in horizontal orientation to console, starting from `tree`. + + - Able to select which node to print from, resulting in a subtree, using `node_name_or_path` + - Able to customize for maximum depth to print, using `max_depth` + - Able to customize style, to choose from `ansi`, `ascii`, `const`, `const_bold`, `rounded`, `double`, and `custom` style + - Default style is `const` style + - If style is set to custom, user can choose their own style icons + - Style icons should have the same number of characters + + Examples: + **Printing tree** + + >>> from bigtree import Node, hprint_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=b) + >>> e = Node("e", parent=b) + >>> hprint_tree(root) + ┌─ d + ┌─ b ─┤ + ─ a ─┤ └─ e + └─ c + + **Printing Sub-tree** + + >>> hprint_tree(root, node_name_or_path="b") + ┌─ d + ─ b ─┤ + └─ e + + >>> hprint_tree(root, max_depth=2) + ┌─ b + ─ a ─┤ + └─ c + + **Available Styles** + + >>> hprint_tree(root, style="ansi") + /- d + /- b -+ + - a -+ \\- e + \\- c + + >>> hprint_tree(root, style="ascii") + +- d + +- b -+ + - a -+ +- e + +- c + + >>> hprint_tree(root, style="const") + ┌─ d + ┌─ b ─┤ + ─ a ─┤ └─ e + └─ c + + >>> hprint_tree(root, style="const_bold") + ┏━ d + ┏━ b ━┫ + ━ a ━┫ ┗━ e + ┗━ c + + >>> hprint_tree(root, style="rounded") + ╭─ d + ╭─ b ─┤ + ─ a ─┤ ╰─ e + ╰─ c + + >>> hprint_tree(root, style="double") + ╔═ d + ╔═ b ═╣ + ═ a ═╣ ╚═ e + ╚═ c + + Args: + tree (Node): tree to print + node_name_or_path (str): node to print from, becomes the root node of printing + max_depth (int): maximum depth of tree to print, based on `depth` attribute, optional + intermediate_node_name (bool): indicator if intermediate nodes have node names, defaults to True + style (str): style of print, defaults to const style + custom_style (Iterable[str]): style of icons, used when `style` is set to 'custom' + """ + result = hyield_tree( + tree, + node_name_or_path=node_name_or_path, + intermediate_node_name=intermediate_node_name, + max_depth=max_depth, + style=style, + custom_style=custom_style, + ) + print("\n".join(result)) + + +def hyield_tree( + tree: T, + node_name_or_path: str = "", + max_depth: int = 0, + intermediate_node_name: bool = True, + style: str = "const", + custom_style: Iterable[str] = [], +) -> List[str]: + """Yield tree in horizontal orientation to console, starting from `tree`. + + - Able to select which node to print from, resulting in a subtree, using `node_name_or_path` + - Able to customize for maximum depth to print, using `max_depth` + - Able to customize style, to choose from `ansi`, `ascii`, `const`, `const_bold`, `rounded`, `double`, and `custom` style + - Default style is `const` style + - If style is set to custom, user can choose their own style icons + - Style icons should have the same number of characters + + Examples: + **Printing tree** + + >>> from bigtree import Node, hyield_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=b) + >>> e = Node("e", parent=b) + >>> result = hyield_tree(root) + >>> print("\\n".join(result)) + ┌─ d + ┌─ b ─┤ + ─ a ─┤ └─ e + └─ c + + **Printing Sub-tree** + + >>> hprint_tree(root, node_name_or_path="b") + ┌─ d + ─ b ─┤ + └─ e + + >>> hprint_tree(root, max_depth=2) + ┌─ b + ─ a ─┤ + └─ c + + **Available Styles** + + >>> hprint_tree(root, style="ansi") + /- d + /- b -+ + - a -+ \\- e + \\- c + + >>> hprint_tree(root, style="ascii") + +- d + +- b -+ + - a -+ +- e + +- c + + >>> hprint_tree(root, style="const") + ┌─ d + ┌─ b ─┤ + ─ a ─┤ └─ e + └─ c + + >>> hprint_tree(root, style="const_bold") + ┏━ d + ┏━ b ━┫ + ━ a ━┫ ┗━ e + ┗━ c + + >>> hprint_tree(root, style="rounded") + ╭─ d + ╭─ b ─┤ + ─ a ─┤ ╰─ e + ╰─ c + + >>> hprint_tree(root, style="double") + ╔═ d + ╔═ b ═╣ + ═ a ═╣ ╚═ e + ╚═ c + + Args: + tree (Node): tree to print + node_name_or_path (str): node to print from, becomes the root node of printing + max_depth (int): maximum depth of tree to print, based on `depth` attribute, optional + intermediate_node_name (bool): indicator if intermediate nodes have node names, defaults to True + style (str): style of print, defaults to const style + custom_style (Iterable[str]): style of icons, used when `style` is set to 'custom' + + Returns: + (List[str]) + """ + from itertools import accumulate + + from bigtree.tree.helper import get_subtree + + available_styles = ExportConstants.HPRINT_STYLES + assert_style_in_dict(style, available_styles) + + tree = get_subtree(tree, node_name_or_path, max_depth) + + # Set style + if style == "custom": + if len(list(custom_style)) != 7: + raise ValueError( + "Custom style selected, please specify the style of 7 icons in `custom_style`" + ) + ( + style_first_child, + style_subsequent_child, + style_split_branch, + style_middle_child, + style_last_child, + style_stem, + style_branch, + ) = custom_style + else: + ( + style_first_child, + style_subsequent_child, + style_split_branch, + style_middle_child, + style_last_child, + style_stem, + style_branch, + ) = available_styles[style] + if ( + not len(style_first_child) + == len(style_subsequent_child) + == len(style_split_branch) + == len(style_middle_child) + == len(style_last_child) + == len(style_stem) + == len(style_branch) + == 1 + ): + raise ValueError("For custom style, all style icons must have length 1") + + # Calculate padding + space = " " + padding_depths = collections.defaultdict(int) + if intermediate_node_name: + for _idx, _children in enumerate(levelordergroup_iter(tree)): + padding_depths[_idx + 1] = max([len(node.node_name) for node in _children]) + + def _hprint_branch(_node: Union[T, Node], _cur_depth: int) -> Tuple[List[str], int]: + """Get string for tree horizontally. + Recursively iterate the nodes in post-order traversal manner. + + Args: + _node (Node): node to get string + _cur_depth (int): current depth of node + + Returns: + (Tuple[List[str], int]): Intermediate/final result for node, index of branch + """ + if not _node: + _node = Node(" ") + node_name_centered = _node.node_name.center(padding_depths[_cur_depth]) + + children = list(_node.children) if any(list(_node.children)) else [] + if not len(children): + node_str = f"{style_branch} {node_name_centered.rstrip()}" + return [node_str], 0 + + result, result_nrow, result_idx = [], [], [] + if intermediate_node_name: + node_str = f"""{style_branch} {node_name_centered} {style_branch}""" + else: + node_str = f"""{style_branch}{style_branch}{style_branch}""" + padding = space * len(node_str) + for idx, child in enumerate(children): + result_child, result_branch_idx = _hprint_branch(child, _cur_depth + 1) + result.extend(result_child) + result_nrow.append(len(result_child)) + result_idx.append(result_branch_idx) + + # Calculate index of first branch, last branch, total length, and midpoint + first, last, end = ( + result_idx[0], + sum(result_nrow) + result_idx[-1] - result_nrow[-1], + sum(result_nrow) - 1, + ) + mid = (first + last) // 2 + + if len(children) == 1: + # Special case for one child (need only branch) + result_prefix = ( + [padding + space] * first + + [node_str + style_branch] + + [padding + space] * (end - last) + ) + elif len(children) == 2: + # Special case for two children (need split_branch) + if last - first == 1: + # Create gap if two children occupy two rows + assert len(result) == 2 + result = [result[0], "", result[1]] + last = end = first + 2 + mid = (last - first) // 2 + result_prefix = ( + [padding + space] * first + + [padding + style_first_child] + + [padding + style_stem] * (mid - first - 1) + + [node_str + style_split_branch] + + [padding + style_stem] * (last - mid - 1) + + [padding + style_last_child] + + [padding + space] * (end - last) + ) + else: + branch_idxs = list( + ( + offset + blanks + for offset, blanks in zip( + result_idx, [0] + list(accumulate(result_nrow)) + ) + ) + ) + n_stems = [(b - a - 1) for a, b in zip(branch_idxs, branch_idxs[1:])] + result_prefix = ( + [padding + space] * first + + [padding + style_first_child] + + [ + _line + for line in [ + [padding + style_stem] * n_stem + + [padding + style_subsequent_child] + for n_stem in n_stems[:-1] + ] + for _line in line + ] + + [padding + style_stem] * n_stems[-1] + + [padding + style_last_child] + + [padding + space] * (end - last) + ) + result_prefix[mid] = node_str + style_split_branch + if mid in branch_idxs: + result_prefix[mid] = node_str + style_middle_child + result = [prefix + stem for prefix, stem in zip(result_prefix, result)] + return result, mid + + result, _ = _hprint_branch(tree, 1) + return result + + +def tree_to_newick( + tree: T, + intermediate_node_name: bool = True, + length_attr: str = "", + length_sep: Union[str, NewickCharacter] = NewickCharacter.SEP, + attr_list: Iterable[str] = [], + attr_prefix: str = "&&NHX:", + attr_sep: Union[str, NewickCharacter] = NewickCharacter.SEP, +) -> str: + """Export tree to Newick notation. Useful for describing phylogenetic tree. + + In the Newick Notation (or New Hampshire Notation), + - Tree is represented in round brackets i.e., `(child1,child2,child3)parent`. + - If there are nested tree, they will be in nested round brackets i.e., `((grandchild1)child1,(grandchild2,grandchild3)child2)parent`. + - If there is length attribute, they will be beside the name i.e., `(child1:0.5,child2:0.1)parent`. + - If there are other attributes, attributes are represented in square brackets i.e., `(child1:0.5[S:human],child2:0.1[S:human])parent[S:parent]`. + + Customizations include: + - Omitting names of root and intermediate nodes, default all node names are shown. + - Changing length separator to other symbol, default is `:`. + - Adding an attribute prefix, default is `&&NHX:`. + - Changing the attribute separator to other symbol, default is `:`. + + Examples: + >>> from bigtree import Node, tree_to_newick + >>> root = Node("a", species="human") + >>> b = Node("b", age=65, species="human", parent=root) + >>> c = Node("c", age=60, species="human", parent=root) + >>> d = Node("d", age=40, species="human", parent=b) + >>> e = Node("e", age=35, species="human", parent=b) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + └── c + + >>> tree_to_newick(root) + '((d,e)b,c)a' + + >>> tree_to_newick(root, length_attr="age") + '((d:40,e:35)b:65,c:60)a' + + >>> tree_to_newick(root, length_attr="age", attr_list=["species"]) + '((d:40[&&NHX:species=human],e:35[&&NHX:species=human])b:65[&&NHX:species=human],c:60[&&NHX:species=human])a[&&NHX:species=human]' + + Args: + tree (Node): tree to be exported + intermediate_node_name (bool): indicator if intermediate nodes have node names, defaults to True + length_attr (str): node length attribute to extract to beside name, optional + length_sep (str): separator between node name and length, used if length_attr is non-empty, defaults to ":" + attr_list (Iterable[str]): list of node attributes to extract into square bracket, optional + attr_prefix (str): prefix before all attributes, within square bracket, used if attr_list is non-empty, defaults to "&&NHX:" + attr_sep (str): separator between attributes, within square brackets, used if attr_list is non-empty, defaults to ":" + + Returns: + (str) + """ + if not tree: + return "" + if isinstance(length_sep, NewickCharacter): + length_sep = length_sep.value + if isinstance(attr_sep, NewickCharacter): + attr_sep = attr_sep.value + + def _serialize(item: Any) -> Any: + """Serialize item if it contains special Newick characters. + + Args: + item (Any): item to serialize + + Returns: + (Any) + """ + if isinstance(item, str) and set(item).intersection(NewickCharacter.values()): + item = f"""'{item.replace(NewickCharacter.ATTR_QUOTE, '"')}'""" + return item + + node_name_str = "" + if (intermediate_node_name) or (not intermediate_node_name and tree.is_leaf): + node_name_str = _serialize(tree.node_name) + if length_attr and not tree.is_root: + if not tree.get_attr(length_attr): + raise ValueError(f"Length attribute does not exist for node {tree}") + node_name_str += f"{length_sep}{tree.get_attr(length_attr)}" + + attr_str = "" + if attr_list: + attr_str = attr_sep.join( + [ + f"{_serialize(k)}={_serialize(tree.get_attr(k))}" + for k in attr_list + if tree.get_attr(k) + ] + ) + if attr_str: + attr_str = f"[{attr_prefix}{attr_str}]" + + if tree.is_leaf: + return f"{node_name_str}{attr_str}" + + children_newick = ",".join( + tree_to_newick( + child, + intermediate_node_name=intermediate_node_name, + length_attr=length_attr, + length_sep=length_sep, + attr_list=attr_list, + attr_prefix=attr_prefix, + attr_sep=attr_sep, + ) + for child in tree.children + ) + return f"({children_newick}){node_name_str}{attr_str}" + + +def tree_to_dict( + tree: T, + name_key: str = "name", + parent_key: str = "", + attr_dict: Dict[str, str] = {}, + all_attrs: bool = False, + max_depth: int = 0, + skip_depth: int = 0, + leaf_only: bool = False, +) -> Dict[str, Any]: + """Export tree to dictionary. + + All descendants from `tree` will be exported, `tree` can be the root node or child node of tree. + + Exported dictionary will have key as node path, and node attributes as a nested dictionary. + + Examples: + >>> from bigtree import Node, tree_to_dict + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> tree_to_dict(root, name_key="name", parent_key="parent", attr_dict={"age": "person age"}) + {'/a': {'name': 'a', 'parent': None, 'person age': 90}, '/a/b': {'name': 'b', 'parent': 'a', 'person age': 65}, '/a/b/d': {'name': 'd', 'parent': 'b', 'person age': 40}, '/a/b/e': {'name': 'e', 'parent': 'b', 'person age': 35}, '/a/c': {'name': 'c', 'parent': 'a', 'person age': 60}} + + For a subset of a tree + + >>> tree_to_dict(c, name_key="name", parent_key="parent", attr_dict={"age": "person age"}) + {'/a/c': {'name': 'c', 'parent': 'a', 'person age': 60}} + + Args: + tree (Node): tree to be exported + name_key (str): dictionary key for `node.node_name`, defaults to 'name' + parent_key (str): dictionary key for `node.parent.node_name`, optional + attr_dict (Dict[str, str]): dictionary mapping node attributes to dictionary key, + key: node attributes, value: corresponding dictionary key, optional + all_attrs (bool): indicator whether to retrieve all ``Node`` attributes, overrides `attr_dict`, defaults to False + max_depth (int): maximum depth to export tree, optional + skip_depth (int): number of initial depths to skip, optional + leaf_only (bool): indicator to retrieve only information from leaf nodes + + Returns: + (Dict[str, Any]) + """ + tree = tree.copy() + data_dict = {} + + def _recursive_append(node: T) -> None: + """Recursively iterate through node and its children to export to dictionary. + + Args: + node (Node): current node + """ + if node: + if ( + (not max_depth or node.depth <= max_depth) + and (not skip_depth or node.depth > skip_depth) + and (not leaf_only or node.is_leaf) + ): + data_child: Dict[str, Any] = {} + if name_key: + data_child[name_key] = node.node_name + if parent_key: + parent_name = None + if node.parent: + parent_name = node.parent.node_name + data_child[parent_key] = parent_name + if all_attrs: + data_child.update( + dict( + node.describe( + exclude_attributes=["name"], exclude_prefix="_" + ) + ) + ) + else: + for k, v in attr_dict.items(): + data_child[v] = node.get_attr(k) + data_dict[node.path_name] = data_child + for _node in node.children: + _recursive_append(_node) + + _recursive_append(tree) + return data_dict + + +def tree_to_nested_dict( + tree: T, + name_key: str = "name", + child_key: str = "children", + attr_dict: Dict[str, str] = {}, + all_attrs: bool = False, + max_depth: int = 0, +) -> Dict[str, Any]: + """Export tree to nested dictionary. + + All descendants from `tree` will be exported, `tree` can be the root node or child node of tree. + + Exported dictionary will have key as node attribute names, and children as a nested recursive dictionary. + + Examples: + >>> from bigtree import Node, tree_to_nested_dict + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> tree_to_nested_dict(root, all_attrs=True) + {'name': 'a', 'age': 90, 'children': [{'name': 'b', 'age': 65, 'children': [{'name': 'd', 'age': 40}, {'name': 'e', 'age': 35}]}, {'name': 'c', 'age': 60}]} + + Args: + tree (Node): tree to be exported + name_key (str): dictionary key for `node.node_name`, defaults to 'name' + child_key (str): dictionary key for list of children, optional + attr_dict (Dict[str, str]): dictionary mapping node attributes to dictionary key, + key: node attributes, value: corresponding dictionary key, optional + all_attrs (bool): indicator whether to retrieve all ``Node`` attributes, overrides `attr_dict`, defaults to False + max_depth (int): maximum depth to export tree, optional + + Returns: + (Dict[str, Any]) + """ + tree = tree.copy() + data_dict: Dict[str, List[Dict[str, Any]]] = {} + + def _recursive_append(node: T, parent_dict: Dict[str, Any]) -> None: + """Recursively iterate through node and its children to export to nested dictionary. + + Args: + node (Node): current node + parent_dict (Dict[str, Any]): parent dictionary + """ + if node: + if not max_depth or node.depth <= max_depth: + data_child = {name_key: node.node_name} + if all_attrs: + data_child.update( + dict( + node.describe( + exclude_attributes=["name"], exclude_prefix="_" + ) + ) + ) + else: + for k, v in attr_dict.items(): + data_child[v] = node.get_attr(k) + if child_key in parent_dict: + parent_dict[child_key].append(data_child) + else: + parent_dict[child_key] = [data_child] + + for _node in node.children: + _recursive_append(_node, data_child) + + _recursive_append(tree, data_dict) + return data_dict[child_key][0] + + +@optional_dependencies_pandas +def tree_to_dataframe( + tree: T, + path_col: str = "path", + name_col: str = "name", + parent_col: str = "", + attr_dict: Dict[str, str] = {}, + all_attrs: bool = False, + max_depth: int = 0, + skip_depth: int = 0, + leaf_only: bool = False, +) -> pd.DataFrame: + """Export tree to pandas DataFrame. + + All descendants from `tree` will be exported, `tree` can be the root node or child node of tree. + + Examples: + >>> from bigtree import Node, tree_to_dataframe + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> tree_to_dataframe(root, name_col="name", parent_col="parent", path_col="path", attr_dict={"age": "person age"}) + path name parent person age + 0 /a a None 90 + 1 /a/b b a 65 + 2 /a/b/d d b 40 + 3 /a/b/e e b 35 + 4 /a/c c a 60 + + For a subset of a tree. + + >>> tree_to_dataframe(b, name_col="name", parent_col="parent", path_col="path", attr_dict={"age": "person age"}) + path name parent person age + 0 /a/b b a 65 + 1 /a/b/d d b 40 + 2 /a/b/e e b 35 + + Args: + tree (Node): tree to be exported + path_col (str): column name for `node.path_name`, defaults to 'path' + name_col (str): column name for `node.node_name`, defaults to 'name' + parent_col (str): column name for `node.parent.node_name`, optional + attr_dict (Dict[str, str]): dictionary mapping node attributes to column name, + key: node attributes, value: corresponding column in dataframe, optional + all_attrs (bool): indicator whether to retrieve all ``Node`` attributes, overrides `attr_dict`, defaults to False + max_depth (int): maximum depth to export tree, optional + skip_depth (int): number of initial depths to skip, optional + leaf_only (bool): indicator to retrieve only information from leaf nodes + + Returns: + (pd.DataFrame) + """ + tree = tree.copy() + data_list = [] + + def _recursive_append(node: T) -> None: + """Recursively iterate through node and its children to export to dataframe. + + Args: + node (Node): current node + """ + if node: + if ( + (not max_depth or node.depth <= max_depth) + and (not skip_depth or node.depth > skip_depth) + and (not leaf_only or node.is_leaf) + ): + data_child: Dict[str, Any] = {} + if path_col: + data_child[path_col] = node.path_name + if name_col: + data_child[name_col] = node.node_name + if parent_col: + parent_name = None + if node.parent: + parent_name = node.parent.node_name + data_child[parent_col] = parent_name + + if all_attrs: + data_child.update( + node.describe(exclude_attributes=["name"], exclude_prefix="_") + ) + else: + for k, v in attr_dict.items(): + data_child[v] = node.get_attr(k) + data_list.append(data_child) + for _node in node.children: + _recursive_append(_node) + + _recursive_append(tree) + return pd.DataFrame(data_list) + + +@optional_dependencies_image("pydot") +def tree_to_dot( + tree: Union[T, List[T]], + directed: bool = True, + rankdir: str = "TB", + bg_colour: str = "", + node_colour: str = "", + node_shape: str = "", + edge_colour: str = "", + node_attr: Callable[[T], Dict[str, Any]] | str = "", + edge_attr: Callable[[T], Dict[str, Any]] | str = "", +) -> pydot.Dot: + r"""Export tree or list of trees to image. + Possible node attributes include style, fillcolor, shape. + + Examples: + >>> from bigtree import Node, tree_to_dot + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> graph = tree_to_dot(root) + + Display image directly without saving (requires IPython) + + >>> from IPython.display import Image, display + >>> plt = Image(graph.create_png()) + >>> display(plt) + + + Export to image, dot file, etc. + + >>> graph.write_png("assets/docstr/tree.png") + >>> graph.write_dot("assets/docstr/tree.dot") + + Export to string + + >>> graph.to_string() + 'strict digraph G {\nrankdir=TB;\na0 [label=a];\nb0 [label=b];\na0 -> b0;\nd0 [label=d];\nb0 -> d0;\ne0 [label=e];\nb0 -> e0;\nc0 [label=c];\na0 -> c0;\n}\n' + + Defining node and edge attributes (using node attribute) + + >>> class CustomNode(Node): + ... def __init__(self, name, node_shape="", edge_label="", **kwargs): + ... super().__init__(name, **kwargs) + ... self.node_shape = node_shape + ... self.edge_label = edge_label + ... + ... @property + ... def edge_attr(self): + ... if self.edge_label: + ... return {"label": self.edge_label} + ... return {} + ... + ... @property + ... def node_attr(self): + ... if self.node_shape: + ... return {"shape": self.node_shape} + ... return {} + >>> + >>> + >>> root = CustomNode("a", node_shape="circle") + >>> b = CustomNode("b", edge_label="child", parent=root) + >>> c = CustomNode("c", edge_label="child", parent=root) + >>> d = CustomNode("d", node_shape="square", edge_label="child", parent=b) + >>> e = CustomNode("e", node_shape="square", edge_label="child", parent=b) + >>> graph = tree_to_dot(root, node_colour="gold", node_shape="diamond", node_attr="node_attr", edge_attr="edge_attr") + >>> graph.write_png("assets/export_tree_dot.png") + + ![Export to dot](https://github.com/kayjan/bigtree/raw/master/assets/export_tree_dot.png) + + Alternative way to define node and edge attributes (using callable function) + + >>> def get_node_attribute(node: Node): + ... if node.is_leaf: + ... return {"shape": "square"} + ... return {"shape": "circle"} + >>> + >>> + >>> root = CustomNode("a") + >>> b = CustomNode("b", parent=root) + >>> c = CustomNode("c", parent=root) + >>> d = CustomNode("d", parent=b) + >>> e = CustomNode("e", parent=b) + >>> graph = tree_to_dot(root, node_colour="gold", node_attr=get_node_attribute) + >>> graph.write_png("assets/export_tree_dot_callable.png") + + ![Export to dot (callable)](https://github.com/kayjan/bigtree/raw/master/assets/export_tree_dot_callable.png) + + Args: + tree (Node/List[Node]): tree or list of trees to be exported + directed (bool): indicator whether graph should be directed or undirected, defaults to True + rankdir (str): layout direction, defaults to 'TB' (top to bottom), can be 'BT' (bottom to top), + 'LR' (left to right), 'RL' (right to left) + bg_colour (str): background color of image, defaults to None + node_colour (str): fill colour of nodes, defaults to None + node_shape (str): shape of nodes, defaults to None + Possible node_shape include "circle", "square", "diamond", "triangle" + edge_colour (str): colour of edges, defaults to None + node_attr (str | Callable): If string type, it refers to ``Node`` attribute for node style. + If callable type, it takes in the node itself and returns the node style. + This overrides `node_colour` and `node_shape` and defaults to None. + Possible node styles include {"style": "filled", "fillcolor": "gold", "shape": "diamond"} + edge_attr (str | Callable): If stirng type, it refers to ``Node`` attribute for edge style. + If callable type, it takes in the node itself and returns the edge style. + This overrides `edge_colour`, and defaults to None. + Possible edge styles include {"style": "bold", "label": "edge label", "color": "black"} + + Returns: + (pydot.Dot) + """ + # Get style + graph_style = dict(bgcolor=bg_colour) if bg_colour else dict() + node_style = dict(style="filled", fillcolor=node_colour) if node_colour else dict() + if node_shape: + node_style["shape"] = node_shape + edge_style = dict(color=edge_colour) if edge_colour else dict() + + tree = tree.copy() + _graph = ( + pydot.Dot(graph_type="digraph", strict=True, rankdir=rankdir, **graph_style) + if directed + else pydot.Dot(graph_type="graph", strict=True, rankdir=rankdir, **graph_style) + ) + + if not isinstance(tree, list): + tree = [tree] + + for _tree in tree: + if not isinstance(_tree, Node): + raise TypeError("Tree should be of type `Node`, or inherit from `Node`") + + name_dict: Dict[str, List[str]] = collections.defaultdict(list) + + def _recursive_append(parent_name: Optional[str], child_node: T) -> None: + """Recursively iterate through node and its children to export to dot by creating node and edges. + + Args: + parent_name (Optional[str]): parent name + child_node (Node): current node + """ + _node_style = node_style.copy() + _edge_style = edge_style.copy() + + child_label = child_node.node_name + if child_node.path_name not in name_dict[child_label]: # pragma: no cover + name_dict[child_label].append(child_node.path_name) + child_name = child_label + str( + name_dict[child_label].index(child_node.path_name) + ) + if node_attr: + if isinstance(node_attr, str) and child_node.get_attr(node_attr): + _node_style.update(child_node.get_attr(node_attr)) + elif isinstance(node_attr, Callable): # type: ignore + _node_style.update(node_attr(child_node)) # type: ignore + if edge_attr: + if isinstance(edge_attr, str) and child_node.get_attr(edge_attr): + _edge_style.update(child_node.get_attr(edge_attr)) + elif isinstance(edge_attr, Callable): # type: ignore + _edge_style.update(edge_attr(child_node)) # type: ignore + node = pydot.Node(name=child_name, label=child_label, **_node_style) + _graph.add_node(node) + if parent_name is not None: + edge = pydot.Edge(parent_name, child_name, **_edge_style) + _graph.add_edge(edge) + for child in child_node.children: + if child: + _recursive_append(child_name, child) + + _recursive_append(None, _tree.root) + return _graph + + +@optional_dependencies_image("Pillow") +def tree_to_pillow( + tree: T, + width: int = 0, + height: int = 0, + start_pos: Tuple[int, int] = (10, 10), + font_family: str = "", + font_size: int = 12, + font_colour: Union[Tuple[int, int, int], str] = "black", + bg_colour: Union[Tuple[int, int, int], str] = "white", + **kwargs: Any, +) -> Image.Image: + """Export tree to image (JPG, PNG). + Image will be similar format as `print_tree`, accepts additional keyword arguments as input to `yield_tree`. + + Examples: + >>> from bigtree import Node, tree_to_pillow + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> pillow_image = tree_to_pillow(root) + + Export to image (PNG, JPG) file, etc. + + >>> pillow_image.save("assets/docstr/tree_pillow.png") + >>> pillow_image.save("assets/docstr/tree_pillow.jpg") + + Args: + tree (Node): tree to be exported + width (int): width of image, optional as width of image is calculated automatically + height (int): height of image, optional as height of image is calculated automatically + start_pos (Tuple[int, int]): start position of text, (x-offset, y-offset), defaults to (10, 10) + font_family (str): file path of font family, requires .ttf file, defaults to DejaVuSans + font_size (int): font size, defaults to 12 + font_colour (Union[Tuple[int, int, int], str]): font colour, accepts tuple of RGB values or string, defaults to black + bg_colour (Union[Tuple[int, int, int], str]): background of image, accepts tuple of RGB values or string, defaults to white + + Returns: + (PIL.Image.Image) + """ + # Initialize font + if not font_family: + dejavusans_url = "https://github.com/kayjan/bigtree/raw/master/assets/DejaVuSans.ttf?raw=true" + font_family = urlopen(dejavusans_url) + try: + font = ImageFont.truetype(font_family, font_size) + except OSError: + raise ValueError( + f"Font file {font_family} is not found, set `font_family` parameter to point to a valid .ttf file." + ) + + # Initialize text + image_text = [] + for branch, stem, node in yield_tree(tree, **kwargs): + image_text.append(f"{branch}{stem}{node.node_name}\n") + + # Calculate image dimension from text, otherwise override with argument + def get_list_of_text_dimensions( + text_list: List[str], + ) -> List[Tuple[int, int, int, int]]: + """Get list dimensions. + + Args: + text_list (List[str]): list of texts + + Returns: + (List[Tuple[int, int, int, int]]): list of (left, top, right, bottom) bounding box + """ + _image = Image.new("RGB", (0, 0)) + _draw = ImageDraw.Draw(_image) + return [_draw.textbbox((0, 0), text_line, font=font) for text_line in text_list] + + text_dimensions = get_list_of_text_dimensions(image_text) + text_height = sum( + [text_dimension[3] + text_dimension[1] for text_dimension in text_dimensions] + ) + text_width = max( + [text_dimension[2] + text_dimension[0] for text_dimension in text_dimensions] + ) + image_text_str = "".join(image_text) + width = max(width, text_width + 2 * start_pos[0]) + height = max(height, text_height + 2 * start_pos[1]) + + # Initialize and draw image + image = Image.new("RGB", (width, height), bg_colour) + image_draw = ImageDraw.Draw(image) + image_draw.text(start_pos, image_text_str, font=font, fill=font_colour) + return image + + +def tree_to_mermaid( + tree: T, + title: str = "", + rankdir: str = "TB", + line_shape: str = "basis", + node_colour: str = "", + node_border_colour: str = "", + node_border_width: float = 1, + node_shape: str = "rounded_edge", + node_shape_attr: Callable[[T], str] | str = "", + edge_arrow: str = "normal", + edge_arrow_attr: Callable[[T], str] | str = "", + edge_label: str = "", + node_attr: Callable[[T], str] | str = "", + **kwargs: Any, +) -> str: + r"""Export tree to mermaid Markdown text. Accepts additional keyword arguments as input to `yield_tree`. + + Parameters for customizations that applies to entire flowchart include: + - Title, `title` + - Layout direction, `rankdir` + - Line shape or curvature, `line_shape` + - Fill colour of nodes, `node_colour` + - Border colour of nodes, `node_border_colour` + - Border width of nodes, `node_border_width` + - Node shape, `node_shape` + - Edge arrow style, `edge_arrow` + + Parameters for customizations that apply to customized nodes: + - Fill colour of nodes, fill under `node_attr` + - Border colour of nodes, stroke under `node_attr` + - Border width of nodes, stroke-width under `node_attr` + - Node shape, `node_shape_attr` + - Edge arrow style, `edge_arrow_attr` + - Edge label, `edge_label` + + **Accepted Parameter Values** + + Possible rankdir: + - `TB`: top-to-bottom + - `BT`: bottom-to-top + - `LR`: left-to-right + - `RL`: right-to-left + + Possible line_shape: + - `basis` + - `bumpX`: used in LR or RL direction + - `bumpY` + - `cardinal`: undirected + - `catmullRom`: undirected + - `linear`: + - `monotoneX`: used in LR or RL direction + - `monotoneY` + - `natural` + - `step`: used in LR or RL direction + - `stepAfter` + - `stepBefore`: used in LR or RL direction + + Possible node_shape: + - `rounded_edge`: rectangular with rounded edges + - `stadium`: (_) shape, rectangular with rounded ends + - `subroutine`: ||_|| shape, rectangular with additional line at the ends + - `cylindrical`: database node + - `circle`: circular + - `asymmetric`: >_| shape + - `rhombus`: decision node + - `hexagon`: <_> shape + - `parallelogram`: /_/ shape + - `parallelogram_alt`: \\_\\ shape, inverted parallelogram + - `trapezoid`: /_\\ shape + - `trapezoid_alt`: \\_/ shape, inverted trapezoid + - `double_circle` + + Possible edge_arrow: + - `normal`: directed arrow, shaded arrowhead + - `bold`: bold directed arrow + - `dotted`: dotted directed arrow + - `open`: line, undirected arrow + - `bold_open`: bold line + - `dotted_open`: dotted line + - `invisible`: no line + - `circle`: directed arrow with filled circle arrowhead + - `cross`: directed arrow with cross arrowhead + - `double_normal`: bidirectional directed arrow + - `double_circle`: bidirectional directed arrow with filled circle arrowhead + - `double_cross`: bidirectional directed arrow with cross arrowhead + + Refer to mermaid [documentation](http://mermaid.js.org/syntax/flowchart.html) for more information. + Paste the output into any markdown file renderer to view the flowchart, alternatively visit the + mermaid playground [here](https://mermaid.live/). + + !!! note + + Advanced mermaid flowchart functionalities such as subgraphs and interactions (script, click) are not supported. + + Examples: + >>> from bigtree import tree_to_mermaid + >>> root = Node("a", node_shape="rhombus") + >>> b = Node("b", edge_arrow="bold", edge_label="Child 1", parent=root) + >>> c = Node("c", edge_arrow="dotted", edge_label="Child 2", parent=root) + >>> d = Node("d", node_style="fill:yellow, stroke:black", parent=b) + >>> e = Node("e", parent=b) + >>> graph = tree_to_mermaid(root) + >>> print(graph) + ```mermaid + %%{ init: { 'flowchart': { 'curve': 'basis' } } }%% + flowchart TB + 0("a") --> 0-0("b") + 0-0 --> 0-0-0("d") + 0-0 --> 0-0-1("e") + 0("a") --> 0-1("c") + classDef default stroke-width:1 + ``` + + **Customize node shape, edge label, edge arrow, and custom node attributes** + + >>> graph = tree_to_mermaid(root, node_shape_attr="node_shape", edge_label="edge_label", edge_arrow_attr="edge_arrow", node_attr="node_style") + >>> print(graph) + ```mermaid + %%{ init: { 'flowchart': { 'curve': 'basis' } } }%% + flowchart TB + 0{"a"} ==>|Child 1| 0-0("b") + 0-0:::class0-0-0 --> 0-0-0("d") + 0-0 --> 0-0-1("e") + 0{"a"} -.->|Child 2| 0-1("c") + classDef default stroke-width:1 + classDef class0-0-0 fill:yellow, stroke:black + ``` + + Args: + tree (Node): tree to be exported + title (str): title, defaults to None + rankdir (str): layout direction, defaults to 'TB' (top to bottom), can be 'BT' (bottom to top), + 'LR' (left to right), 'RL' (right to left) + line_shape (str): line shape or curvature, defaults to 'basis' + node_colour (str): fill colour of nodes, can be colour name or hexcode, defaults to None + node_border_colour (str): border colour of nodes, can be colour name or hexcode, defaults to None + node_border_width (float): width of node border, defaults to 1 + node_shape (str): node shape, sets the shape of every node, defaults to 'rounded_edge' + node_shape_attr (str | Callable): If string type, it refers to ``Node`` attribute for node shape. + If callable type, it takes in the node itself and returns the node shape. + This sets the shape of custom nodes, and overrides default `node_shape`, defaults to None + edge_arrow (str): edge arrow style from parent to itself, sets the arrow style of every edge, defaults to 'normal' + edge_arrow_attr (str | Callable): If string type, it refers to ``Node`` attribute for edge arrow style. + If callable type, it takes in the node itself and returns the edge arrow style. + This sets the edge arrow style of custom nodes from parent to itself, and overrides default `edge_arrow`, defaults to None + edge_label (str): ``Node`` attribute for edge label from parent to itself, defaults to None + node_attr (str | Callable): If string type, it refers to ``Node`` attribute for node style. + If callable type, it takes in the node itself and returns the node style. + This overrides `node_colour`, `node_border_colour`, and `node_border_width`, defaults to None + + Returns: + (str) + """ + from bigtree.tree.helper import clone_tree + + rankdirs = MermaidConstants.RANK_DIR + line_shapes = MermaidConstants.LINE_SHAPES + node_shapes = MermaidConstants.NODE_SHAPES + edge_arrows = MermaidConstants.EDGE_ARROWS + + # Assertions + assert_str_in_list("rankdir", rankdir, rankdirs) + assert_key_in_dict("node_shape", node_shape, node_shapes) + assert_str_in_list("line_shape", line_shape, line_shapes) + assert_key_in_dict("edge_arrow", edge_arrow, edge_arrows) + + mermaid_template = """```mermaid\n{title}{line_style}\nflowchart {rankdir}\n{flows}\n{styles}\n```""" + flowchart_template = "{from_node_ref}{from_node_name}{flow_style} {arrow}{arrow_label} {to_node_ref}{to_node_name}" + style_template = "classDef {style_name} {style}" + + # Content + title = f"---\ntitle: {title}\n---" if title else "" + line_style = f"%%{{ init: {{ 'flowchart': {{ 'curve': '{line_shape}' }} }} }}%%" + styles = [] + flows = [] + + def _construct_style( + _style_name: str, + _node_colour: str, + _node_border_colour: str, + _node_border_width: float, + ) -> str: + """Construct style for Mermaid. + + Args: + _style_name (str): style name + _node_colour (str): node colour + _node_border_colour (str): node border colour + _node_border_width (float): node border width + + Returns: + (str) + """ + style = [] + if _node_colour: + style.append(f"fill:{_node_colour}") + if _node_border_colour: + style.append(f"stroke:{_node_border_colour}") + if _node_border_width: + style.append(f"stroke-width:{_node_border_width}") + if not style: + raise ValueError("Unable to construct style!") + return style_template.format(style_name=_style_name, style=",".join(style)) + + default_style = _construct_style( + "default", node_colour, node_border_colour, node_border_width + ) + styles.append(default_style) + + class MermaidNode(Node): + """Mermaid Node, adds property `mermaid_name`""" + + @property + def mermaid_name(self) -> str: + """Reference name for MermaidNode, must be unique for each node. + + Returns: + (str) + """ + if self.is_root: + return "0" + return f"{self.parent.mermaid_name}-{self.parent.children.index(self)}" + + def _get_attr( + _node: MermaidNode, + attr_parameter: str | Callable[[MermaidNode], str], + default_parameter: str, + ) -> str: + """Get custom attribute if available, otherwise return default parameter. + + Args: + _node (MermaidNode): node to get custom attribute, can be accessed as node attribute or a callable that takes in the node + attr_parameter (str | Callable): custom attribute parameter + default_parameter (str): default parameter if there is no attr_parameter + + Returns: + (str) + """ + _choice = default_parameter + if attr_parameter: + if isinstance(attr_parameter, str): + _choice = _node.get_attr(attr_parameter, default_parameter) + else: + _choice = attr_parameter(_node) + return _choice + + tree_mermaid = clone_tree(tree, MermaidNode) + for _, _, node in yield_tree(tree_mermaid, **kwargs): + if not node.is_root: + # Get custom style (node_shape_attr) + _parent_node_name = "" + if node.parent.is_root: + _parent_node_shape_choice = _get_attr( + node.parent, node_shape_attr, node_shape # type: ignore + ) + _parent_node_shape = node_shapes[_parent_node_shape_choice] + _parent_node_name = _parent_node_shape.format(label=node.parent.name) + _node_shape_choice = _get_attr(node, node_shape_attr, node_shape) # type: ignore + _node_shape = node_shapes[_node_shape_choice] + _node_name = _node_shape.format(label=node.name) + + # Get custom style (edge_arrow_attr, edge_label) + _arrow_choice = _get_attr(node, edge_arrow_attr, edge_arrow) # type: ignore + _arrow = edge_arrows[_arrow_choice] + _arrow_label = ( + f"|{node.get_attr(edge_label)}|" if node.get_attr(edge_label) else "" + ) + + # Get custom style (node_attr) + _flow_style = _get_attr(node, node_attr, "") # type: ignore + if _flow_style: + _flow_style_class = f"""class{node.get_attr("mermaid_name")}""" + styles.append( + style_template.format( + style_name=_flow_style_class, style=_flow_style + ) + ) + _flow_style = f":::{_flow_style_class}" + + flows.append( + flowchart_template.format( + from_node_ref=node.parent.get_attr("mermaid_name"), + from_node_name=_parent_node_name, + flow_style=_flow_style, + arrow=_arrow, + arrow_label=_arrow_label, + to_node_ref=node.get_attr("mermaid_name"), + to_node_name=_node_name, + ) + ) + + return mermaid_template.format( + title=title, + line_style=line_style, + rankdir=rankdir, + flows="\n".join(flows), + styles="\n".join(styles), + ) diff --git a/python310/packages/bigtree/tree/helper.py b/python310/packages/bigtree/tree/helper.py new file mode 100644 index 0000000..afac7b6 --- /dev/null +++ b/python310/packages/bigtree/tree/helper.py @@ -0,0 +1,415 @@ +from collections import deque +from typing import Any, Deque, Dict, List, Set, Type, TypeVar, Union + +from bigtree.node.basenode import BaseNode +from bigtree.node.binarynode import BinaryNode +from bigtree.node.node import Node +from bigtree.tree.construct import add_dict_to_tree_by_path, dataframe_to_tree +from bigtree.tree.export import tree_to_dataframe +from bigtree.tree.search import find_path +from bigtree.utils.exceptions import NotFoundError +from bigtree.utils.iterators import levelordergroup_iter + +__all__ = ["clone_tree", "get_subtree", "prune_tree", "get_tree_diff"] +BaseNodeT = TypeVar("BaseNodeT", bound=BaseNode) +BinaryNodeT = TypeVar("BinaryNodeT", bound=BinaryNode) +NodeT = TypeVar("NodeT", bound=Node) + + +def clone_tree(tree: BaseNode, node_type: Type[BaseNodeT]) -> BaseNodeT: + """Clone tree to another ``Node`` type. + If the same type is needed, simply do a tree.copy(). + + Examples: + >>> from bigtree import BaseNode, Node, clone_tree + >>> root = BaseNode(name="a") + >>> b = BaseNode(name="b", parent=root) + >>> clone_tree(root, Node) + Node(/a, ) + + Args: + tree (BaseNode): tree to be cloned, must inherit from BaseNode + node_type (Type[BaseNode]): type of cloned tree + + Returns: + (BaseNode) + """ + if not isinstance(tree, BaseNode): + raise TypeError("Tree should be of type `BaseNode`, or inherit from `BaseNode`") + + # Start from root + root_info = dict(tree.root.describe(exclude_prefix="_")) + root_node = node_type(**root_info) + + def _recursive_add_child( + _new_parent_node: BaseNodeT, _parent_node: BaseNode + ) -> None: + """Recursively clone current node + + Args: + _new_parent_node (BaseNode): cloned parent node + _parent_node (BaseNode): parent node to be cloned + """ + for _child in _parent_node.children: + if _child: + child_info = dict(_child.describe(exclude_prefix="_")) + child_node = node_type(**child_info) + child_node.parent = _new_parent_node + _recursive_add_child(child_node, _child) + + _recursive_add_child(root_node, tree.root) + return root_node + + +def get_subtree( + tree: NodeT, + node_name_or_path: str = "", + max_depth: int = 0, +) -> NodeT: + """Get subtree based on node name or node path, and/or maximum depth of tree. + + Examples: + >>> from bigtree import Node, get_subtree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=b) + >>> d = Node("d", parent=b) + >>> e = Node("e", parent=root) + >>> root.show() + a + ├── b + │ ├── c + │ └── d + └── e + + Get subtree + + >>> root_subtree = get_subtree(root, "b") + >>> root_subtree.show() + b + ├── c + └── d + + Args: + tree (Node): existing tree + node_name_or_path (str): node name or path to get subtree, defaults to None + max_depth (int): maximum depth of subtree, based on `depth` attribute, defaults to None + + Returns: + (Node) + """ + tree = tree.copy() + + if node_name_or_path: + tree = find_path(tree, node_name_or_path) + if not tree: + raise ValueError(f"Node name or path {node_name_or_path} not found") + + if not tree.is_root: + tree.parent = None + + if max_depth: + tree = prune_tree(tree, max_depth=max_depth) + return tree + + +def prune_tree( + tree: Union[BinaryNodeT, NodeT], + prune_path: Union[List[str], str] = "", + exact: bool = False, + sep: str = "/", + max_depth: int = 0, +) -> Union[BinaryNodeT, NodeT]: + """Prune tree by path or depth, returns the root of a *copy* of the original tree. + + For pruning by `prune_path`, + + - All siblings along the prune path will be removed. + - If ``exact=True``, all descendants of prune path will be removed. + - Prune path can be string (only one path) or a list of strings (multiple paths). + - Prune path name should be unique, can be full path, partial path (trailing part of path), or node name. + + For pruning by `max_depth`, + + - All nodes that are beyond `max_depth` will be removed. + + Path should contain ``Node`` name, separated by `sep`. + + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + + Examples: + >>> from bigtree import Node, prune_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=b) + >>> d = Node("d", parent=b) + >>> e = Node("e", parent=root) + >>> root.show() + a + ├── b + │ ├── c + │ └── d + └── e + + Prune (default is keep descendants) + + >>> root_pruned = prune_tree(root, "a/b") + >>> root_pruned.show() + a + └── b + ├── c + └── d + + Prune exact path + + >>> root_pruned = prune_tree(root, "a/b", exact=True) + >>> root_pruned.show() + a + └── b + + Prune multiple paths + + >>> root_pruned = prune_tree(root, ["a/b/d", "a/e"]) + >>> root_pruned.show() + a + ├── b + │ └── d + └── e + + Prune by depth + + >>> root_pruned = prune_tree(root, max_depth=2) + >>> root_pruned.show() + a + ├── b + └── e + + Args: + tree (Union[BinaryNode, Node]): existing tree + prune_path (List[str] | str): prune path(s), all siblings along the prune path(s) will be removed + exact (bool): prune path(s) to be exactly the path, defaults to False (descendants of the path are retained) + sep (str): path separator of `prune_path` + max_depth (int): maximum depth of pruned tree, based on `depth` attribute, defaults to None + + Returns: + (Union[BinaryNode, Node]) + """ + if isinstance(prune_path, str): + prune_path = [prune_path] if prune_path else [] + + if not len(prune_path) and not max_depth: + raise ValueError("Please specify either `prune_path` or `max_depth` or both.") + + tree_copy = tree.copy() + + # Prune by path (prune bottom-up) + if len(prune_path): + ancestors_to_prune: Set[Union[BinaryNodeT, NodeT]] = set() + nodes_to_prune: Set[Union[BinaryNodeT, NodeT]] = set() + for path in prune_path: + path = path.replace(sep, tree.sep) + child = find_path(tree_copy, path) + if not child: + raise NotFoundError( + f"Cannot find any node matching path_name ending with {path}" + ) + nodes_to_prune.add(child) + ancestors_to_prune.update(list(child.ancestors)) + + if exact: + ancestors_to_prune.update(nodes_to_prune) + + for node in ancestors_to_prune: + for child in node.children: + if ( + child + and child not in ancestors_to_prune + and child not in nodes_to_prune + ): + child.parent = None + + # Prune by depth (prune top-down) + if max_depth: + for depth, level_nodes in enumerate(levelordergroup_iter(tree_copy), 1): + if depth == max_depth: + for level_node in level_nodes: + del level_node.children + return tree_copy + + +def get_tree_diff( + tree: Node, other_tree: Node, only_diff: bool = True, attr_list: List[str] = [] +) -> Node: + """Get difference of `tree` to `other_tree`, changes are relative to `tree`. + + Compares the difference in tree structure (default), but can also compare tree attributes using `attr_list`. + Function can return only the differences (default), or all original tree nodes and differences. + + Comparing tree structure: + + - (+) and (-) will be added to node name relative to `tree`. + - For example: (+) refers to nodes that are in `other_tree` but not `tree`. + - For example: (-) refers to nodes that are in `tree` but not `other_tree`. + + Examples: + >>> # Create original tree + >>> from bigtree import Node, get_tree_diff, list_to_tree + >>> root = list_to_tree(["Downloads/Pictures/photo1.jpg", "Downloads/file1.doc", "Downloads/photo2.jpg"]) + >>> root.show() + Downloads + ├── Pictures + │ └── photo1.jpg + ├── file1.doc + └── photo2.jpg + + >>> # Create other tree + >>> root_other = list_to_tree(["Downloads/Pictures/photo1.jpg", "Downloads/Pictures/photo2.jpg", "Downloads/file1.doc"]) + >>> root_other.show() + Downloads + ├── Pictures + │ ├── photo1.jpg + │ └── photo2.jpg + └── file1.doc + + >>> # Get tree differences + >>> tree_diff = get_tree_diff(root, root_other) + >>> tree_diff.show() + Downloads + ├── photo2.jpg (-) + └── Pictures + └── photo2.jpg (+) + + >>> tree_diff = get_tree_diff(root, root_other, only_diff=False) + >>> tree_diff.show() + Downloads + ├── Pictures + │ ├── photo1.jpg + │ └── photo2.jpg (+) + ├── file1.doc + └── photo2.jpg (-) + + Comparing tree attributes + + - (~) will be added to node name if there are differences in tree attributes defined in `attr_list`. + - The node's attributes will be a list of [value in `tree`, value in `other_tree`] + + >>> # Create original tree + >>> root = Node("Downloads") + >>> picture_folder = Node("Pictures", parent=root) + >>> photo2 = Node("photo1.jpg", tags="photo1", parent=picture_folder) + >>> file1 = Node("file1.doc", tags="file1", parent=root) + >>> root.show(attr_list=["tags"]) + Downloads + ├── Pictures + │ └── photo1.jpg [tags=photo1] + └── file1.doc [tags=file1] + + >>> # Create other tree + >>> root_other = Node("Downloads") + >>> picture_folder = Node("Pictures", parent=root_other) + >>> photo1 = Node("photo1.jpg", tags="photo1-edited", parent=picture_folder) + >>> photo2 = Node("photo2.jpg", tags="photo2-new", parent=picture_folder) + >>> file1 = Node("file1.doc", tags="file1", parent=root_other) + >>> root_other.show(attr_list=["tags"]) + Downloads + ├── Pictures + │ ├── photo1.jpg [tags=photo1-edited] + │ └── photo2.jpg [tags=photo2-new] + └── file1.doc [tags=file1] + + >>> # Get tree differences + >>> tree_diff = get_tree_diff(root, root_other, attr_list=["tags"]) + >>> tree_diff.show(attr_list=["tags"]) + Downloads + └── Pictures + ├── photo1.jpg (~) [tags=('photo1', 'photo1-edited')] + └── photo2.jpg (+) + + Args: + tree (Node): tree to be compared against + other_tree (Node): tree to be compared with + only_diff (bool): indicator to show all nodes or only nodes that are different (+/-), defaults to True + attr_list (List[str]): tree attributes to check for difference, defaults to empty list + + Returns: + (Node) + """ + other_tree.sep = tree.sep + name_col = "name" + path_col = "PATH" + indicator_col = "Exists" + + data, data_other = ( + tree_to_dataframe( + _tree, + name_col=name_col, + path_col=path_col, + attr_dict={k: k for k in attr_list}, + ) + for _tree in (tree, other_tree) + ) + + # Check tree structure difference + data_both = data[[path_col, name_col] + attr_list].merge( + data_other[[path_col, name_col] + attr_list], + how="outer", + on=[path_col, name_col], + indicator=indicator_col, + ) + + # Handle tree structure difference + nodes_removed = list(data_both[data_both[indicator_col] == "left_only"][path_col])[ + ::-1 + ] + nodes_added = list(data_both[data_both[indicator_col] == "right_only"][path_col])[ + ::-1 + ] + for node_removed in nodes_removed: + data_both[path_col] = data_both[path_col].str.replace( + node_removed, f"{node_removed} (-)", regex=True + ) + for node_added in nodes_added: + data_both[path_col] = data_both[path_col].str.replace( + node_added, f"{node_added} (+)", regex=True + ) + + # Check tree attribute difference + path_changes_list_of_dict: List[Dict[str, Dict[str, Any]]] = [] + path_changes_deque: Deque[str] = deque([]) + for attr_change in attr_list: + condition_diff = ( + ( + ~data_both[f"{attr_change}_x"].isnull() + | ~data_both[f"{attr_change}_y"].isnull() + ) + & (data_both[f"{attr_change}_x"] != data_both[f"{attr_change}_y"]) + & (data_both[indicator_col] == "both") + ) + data_diff = data_both[condition_diff] + if len(data_diff): + tuple_diff = zip( + data_diff[f"{attr_change}_x"], data_diff[f"{attr_change}_y"] + ) + dict_attr_diff = [{attr_change: v} for v in tuple_diff] + dict_path_diff = dict(list(zip(data_diff[path_col], dict_attr_diff))) + path_changes_list_of_dict.append(dict_path_diff) + path_changes_deque.extend(list(data_diff[path_col])) + + if only_diff: + data_both = data_both[ + (data_both[indicator_col] != "both") + | (data_both[path_col].isin(path_changes_deque)) + ] + data_both = data_both[[path_col]] + if len(data_both): + tree_diff = dataframe_to_tree(data_both, node_type=tree.__class__) + # Handle tree attribute difference + if len(path_changes_deque): + path_changes_list = sorted(path_changes_deque, reverse=True) + name_changes_list = [ + {k: {"name": f"{k.split(tree.sep)[-1]} (~)"} for k in path_changes_list} + ] + path_changes_list_of_dict.extend(name_changes_list) + for attr_change_dict in path_changes_list_of_dict: + tree_diff = add_dict_to_tree_by_path(tree_diff, attr_change_dict) + return tree_diff diff --git a/python310/packages/bigtree/tree/modify.py b/python310/packages/bigtree/tree/modify.py new file mode 100644 index 0000000..3704203 --- /dev/null +++ b/python310/packages/bigtree/tree/modify.py @@ -0,0 +1,1356 @@ +import logging +from typing import List, Optional + +from bigtree.node.node import Node +from bigtree.tree.construct import add_path_to_tree +from bigtree.tree.search import find_full_path, find_path +from bigtree.utils.exceptions import NotFoundError, TreeError + +logging.getLogger(__name__).addHandler(logging.NullHandler()) + +__all__ = [ + "shift_nodes", + "copy_nodes", + "shift_and_replace_nodes", + "copy_nodes_from_tree_to_tree", + "copy_and_replace_nodes_from_tree_to_tree", + "copy_or_shift_logic", + "replace_logic", +] + + +def shift_nodes( + tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + skippable: bool = False, + overriding: bool = False, + merge_children: bool = False, + merge_leaves: bool = False, + delete_children: bool = False, + with_full_path: bool = False, +) -> None: + """Shift nodes from `from_paths` to `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable). + - Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden). + - Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged). + - Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged) + - Able to shift node only and delete children, defaults to False (nodes are shifted together with children). + + For paths in `from_paths` and `to_paths`, + + - Path name can be with or without leading tree path separator symbol. + + For paths in `from_paths`, + + - Path name can be partial path (trailing part of path) or node name. + - If ``with_full_path=True``, path name must be full path. + - Path name must be unique to one node. + + For paths in `to_paths`, + + - Path name must be full path. + - Can set to empty string or None to delete the path in `from_paths`, note that ``copy`` must be set to False. + + If ``merge_children=True``, + + - If `to_path` is not present, it shifts children of `from_path`. + - If `to_path` is present, and ``overriding=False``, original and new children are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new children are retained. + + If ``merge_leaves=True``, + + - If `to_path` is not present, it shifts leaves of `from_path`. + - If `to_path` is present, and ``overriding=False``, original children and leaves are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained, + original non-leaf nodes in `from_path` are retained. + + Examples: + >>> from bigtree import list_to_tree, str_to_tree, shift_nodes + >>> root = list_to_tree(["Downloads/photo1.jpg", "Downloads/file1.doc"]) + >>> root.show() + Downloads + ├── photo1.jpg + └── file1.doc + + >>> shift_nodes( + ... tree=root, + ... from_paths=["Downloads/photo1.jpg", "Downloads/file1.doc"], + ... to_paths=["Downloads/Pictures/photo1.jpg", "Downloads/Files/file1.doc"], + ... ) + >>> root.show() + Downloads + ├── Pictures + │ └── photo1.jpg + └── Files + └── file1.doc + + To delete node, + + >>> root = list_to_tree(["Downloads/photo1.jpg", "Downloads/file1.doc"]) + >>> root.show() + Downloads + ├── photo1.jpg + └── file1.doc + + >>> shift_nodes(root, ["Downloads/photo1.jpg"], [None]) + >>> root.show() + Downloads + └── file1.doc + + In overriding case, + + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Misc\\n" + ... "│ └── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── Pictures\\n" + ... " └── photo2.jpg" + ... ) + >>> root.show() + Downloads + ├── Misc + │ └── Pictures + │ └── photo1.jpg + └── Pictures + └── photo2.jpg + + >>> shift_nodes(root, ["Downloads/Misc/Pictures"], ["Downloads/Pictures"], overriding=True) + >>> root.show() + Downloads + ├── Misc + └── Pictures + └── photo1.jpg + + In ``merge_children=True`` case, child nodes are shifted instead of the parent node. + + - If the path already exists, child nodes are merged with existing children. + - If same node is shifted, the child nodes of the node are merged with the node's parent. + + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Misc\\n" + ... "│ ├── Pictures\\n" + ... "│ │ └── photo2.jpg\\n" + ... "│ └── Applications\\n" + ... "│ └── Chrome.exe\\n" + ... "├── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── dummy\\n" + ... " └── Files\\n" + ... " └── file1.doc" + ... ) + >>> root.show() + Downloads + ├── Misc + │ ├── Pictures + │ │ └── photo2.jpg + │ └── Applications + │ └── Chrome.exe + ├── Pictures + │ └── photo1.jpg + └── dummy + └── Files + └── file1.doc + + >>> shift_nodes( + ... root, + ... ["Downloads/Misc/Pictures", "Applications", "Downloads/dummy"], + ... ["Downloads/Pictures", "Downloads/Applications", "Downloads/dummy"], + ... merge_children=True, + ... ) + >>> root.show() + Downloads + ├── Misc + ├── Pictures + │ ├── photo1.jpg + │ └── photo2.jpg + ├── Chrome.exe + └── Files + └── file1.doc + + In ``merge_leaves=True`` case, leaf nodes are copied instead of the parent node. + + - If the path already exists, leaf nodes are merged with existing children. + - If same node is copied, the leaf nodes of the node are merged with the node's parent. + + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Misc\\n" + ... "│ ├── Pictures\\n" + ... "│ │ └── photo2.jpg\\n" + ... "│ └── Applications\\n" + ... "│ └── Chrome.exe\\n" + ... "├── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── dummy\\n" + ... " └── Files\\n" + ... " └── file1.doc" + ... ) + >>> root.show() + Downloads + ├── Misc + │ ├── Pictures + │ │ └── photo2.jpg + │ └── Applications + │ └── Chrome.exe + ├── Pictures + │ └── photo1.jpg + └── dummy + └── Files + └── file1.doc + + >>> shift_nodes( + ... root, + ... ["Downloads/Misc/Pictures", "Applications", "Downloads/dummy"], + ... ["Downloads/Pictures", "Downloads/Applications", "Downloads/dummy"], + ... merge_leaves=True, + ... ) + >>> root.show() + Downloads + ├── Misc + │ ├── Pictures + │ └── Applications + ├── Pictures + │ ├── photo1.jpg + │ └── photo2.jpg + ├── dummy + │ └── Files + ├── Chrome.exe + └── file1.doc + + In ``delete_children=True`` case, only the node is shifted without its accompanying children/descendants. + + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Misc\\n" + ... "│ └── Applications\\n" + ... "│ └── Chrome.exe\\n" + ... "└── Pictures\\n" + ... " └── photo1.jpg" + ... ) + >>> root.show() + Downloads + ├── Misc + │ └── Applications + │ └── Chrome.exe + └── Pictures + └── photo1.jpg + + >>> shift_nodes(root, ["Applications"], ["Downloads/Applications"], delete_children=True) + >>> root.show() + Downloads + ├── Misc + ├── Pictures + │ └── photo1.jpg + └── Applications + + Args: + tree (Node): tree to modify + from_paths (List[str]): original paths to shift nodes from + to_paths (List[str]): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + skippable (bool): indicator to skip if from path is not found, defaults to False + overriding (bool): indicator to override existing to path if there is clashes, defaults to False + merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False + merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False + delete_children (bool): indicator to shift node only without children, defaults to False + with_full_path (bool): indicator to shift/copy node with full path in `from_paths`, results in faster search, + defaults to False + """ + return copy_or_shift_logic( + tree=tree, + from_paths=from_paths, + to_paths=to_paths, + sep=sep, + copy=False, + skippable=skippable, + overriding=overriding, + merge_children=merge_children, + merge_leaves=merge_leaves, + delete_children=delete_children, + to_tree=None, + with_full_path=with_full_path, + ) # pragma: no cover + + +def copy_nodes( + tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + skippable: bool = False, + overriding: bool = False, + merge_children: bool = False, + merge_leaves: bool = False, + delete_children: bool = False, + with_full_path: bool = False, +) -> None: + """Copy nodes from `from_paths` to `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable). + - Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden). + - Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged). + - Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged) + - Able to copy node only and delete children, defaults to False (nodes are copied together with children). + + For paths in `from_paths` and `to_paths`, + + - Path name can be with or without leading tree path separator symbol. + + For paths in `from_paths`, + + - Path name can be partial path (trailing part of path) or node name. + - If ``with_full_path=True``, path name must be full path. + - Path name must be unique to one node. + + For paths in `to_paths`, + + - Path name must be full path. + + If ``merge_children=True``, + + - If `to_path` is not present, it copies children of `from_path`. + - If `to_path` is present, and ``overriding=False``, original and new children are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new children are retained. + + If ``merge_leaves=True``, + + - If `to_path` is not present, it copies leaves of `from_path`. + - If `to_path` is present, and ``overriding=False``, original children and leaves are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained. + + Examples: + >>> from bigtree import list_to_tree, str_to_tree, copy_nodes + >>> root = list_to_tree(["Downloads/Pictures", "Downloads/photo1.jpg", "Downloads/file1.doc"]) + >>> root.show() + Downloads + ├── Pictures + ├── photo1.jpg + └── file1.doc + + >>> copy_nodes( + ... tree=root, + ... from_paths=["Downloads/photo1.jpg", "Downloads/file1.doc"], + ... to_paths=["Downloads/Pictures/photo1.jpg", "Downloads/Files/file1.doc"], + ... ) + >>> root.show() + Downloads + ├── Pictures + │ └── photo1.jpg + ├── photo1.jpg + ├── file1.doc + └── Files + └── file1.doc + + In overriding case, + + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Misc\\n" + ... "│ └── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── Pictures\\n" + ... " └── photo2.jpg" + ... ) + >>> root.show() + Downloads + ├── Misc + │ └── Pictures + │ └── photo1.jpg + └── Pictures + └── photo2.jpg + + >>> copy_nodes(root, ["Downloads/Misc/Pictures"], ["Downloads/Pictures"], overriding=True) + >>> root.show() + Downloads + ├── Misc + │ └── Pictures + │ └── photo1.jpg + └── Pictures + └── photo1.jpg + + In ``merge_children=True`` case, child nodes are copied instead of the parent node. + + - If the path already exists, child nodes are merged with existing children. + - If same node is copied, the child nodes of the node are merged with the node's parent. + + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Misc\\n" + ... "│ ├── Pictures\\n" + ... "│ │ └── photo2.jpg\\n" + ... "│ └── Applications\\n" + ... "│ └── Chrome.exe\\n" + ... "├── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── dummy\\n" + ... " └── Files\\n" + ... " └── file1.doc" + ... ) + >>> root.show() + Downloads + ├── Misc + │ ├── Pictures + │ │ └── photo2.jpg + │ └── Applications + │ └── Chrome.exe + ├── Pictures + │ └── photo1.jpg + └── dummy + └── Files + └── file1.doc + + >>> copy_nodes( + ... root, + ... ["Downloads/Misc/Pictures", "Applications", "Downloads/dummy"], + ... ["Downloads/Pictures", "Downloads/Applications", "Downloads/dummy"], + ... merge_children=True, + ... ) + >>> root.show() + Downloads + ├── Misc + │ ├── Pictures + │ │ └── photo2.jpg + │ └── Applications + │ └── Chrome.exe + ├── Pictures + │ ├── photo1.jpg + │ └── photo2.jpg + ├── Chrome.exe + └── Files + └── file1.doc + + In ``merge_leaves=True`` case, leaf nodes are copied instead of the parent node. + + - If the path already exists, leaf nodes are merged with existing children. + - If same node is copied, the leaf nodes of the node are merged with the node's parent. + + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Misc\\n" + ... "│ ├── Pictures\\n" + ... "│ │ └── photo2.jpg\\n" + ... "│ └── Applications\\n" + ... "│ └── Chrome.exe\\n" + ... "├── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── dummy\\n" + ... " └── Files\\n" + ... " └── file1.doc" + ... ) + >>> root.show() + Downloads + ├── Misc + │ ├── Pictures + │ │ └── photo2.jpg + │ └── Applications + │ └── Chrome.exe + ├── Pictures + │ └── photo1.jpg + └── dummy + └── Files + └── file1.doc + + >>> copy_nodes( + ... root, + ... ["Downloads/Misc/Pictures", "Applications", "Downloads/dummy"], + ... ["Downloads/Pictures", "Downloads/Applications", "Downloads/dummy"], + ... merge_leaves=True, + ... ) + >>> root.show() + Downloads + ├── Misc + │ ├── Pictures + │ │ └── photo2.jpg + │ └── Applications + │ └── Chrome.exe + ├── Pictures + │ ├── photo1.jpg + │ └── photo2.jpg + ├── dummy + │ └── Files + │ └── file1.doc + ├── Chrome.exe + └── file1.doc + + In ``delete_children=True`` case, only the node is copied without its accompanying children/descendants. + + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Misc\\n" + ... "│ └── Applications\\n" + ... "│ └── Chrome.exe\\n" + ... "└── Pictures\\n" + ... " └── photo1.jpg" + ... ) + >>> root.show() + Downloads + ├── Misc + │ └── Applications + │ └── Chrome.exe + └── Pictures + └── photo1.jpg + + >>> copy_nodes(root, ["Applications"], ["Downloads/Applications"], delete_children=True) + >>> root.show() + Downloads + ├── Misc + │ └── Applications + │ └── Chrome.exe + ├── Pictures + │ └── photo1.jpg + └── Applications + + Args: + tree (Node): tree to modify + from_paths (List[str]): original paths to shift nodes from + to_paths (List[str]): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + skippable (bool): indicator to skip if from path is not found, defaults to False + overriding (bool): indicator to override existing to path if there is clashes, defaults to False + merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False + merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False + delete_children (bool): indicator to copy node only without children, defaults to False + with_full_path (bool): indicator to shift/copy node with full path in `from_paths`, results in faster search, + defaults to False + """ + return copy_or_shift_logic( + tree=tree, + from_paths=from_paths, + to_paths=to_paths, + sep=sep, + copy=True, + skippable=skippable, + overriding=overriding, + merge_children=merge_children, + merge_leaves=merge_leaves, + delete_children=delete_children, + to_tree=None, + with_full_path=with_full_path, + ) # pragma: no cover + + +def shift_and_replace_nodes( + tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + skippable: bool = False, + delete_children: bool = False, + with_full_path: bool = False, +) -> None: + """Shift nodes from `from_paths` to *replace* `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable). + - Able to shift node only and delete children, defaults to False (nodes are shifted together with children). + + For paths in `from_paths` and `to_paths`, + + - Path name can be with or without leading tree path separator symbol. + + For paths in `from_paths`, + + - Path name can be partial path (trailing part of path) or node name. + - If ``with_full_path=True``, path name must be full path. + - Path name must be unique to one node. + + For paths in `to_paths`, + + - Path name must be full path. + - Path must exist, node-to-be-replaced must be present. + + Examples: + >>> from bigtree import str_to_tree, shift_and_replace_nodes + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── Misc\\n" + ... " └── dummy" + ... ) + >>> root.show() + Downloads + ├── Pictures + │ └── photo1.jpg + └── Misc + └── dummy + + >>> shift_and_replace_nodes(root, ["Downloads/Pictures"], ["Downloads/Misc/dummy"]) + >>> root.show() + Downloads + └── Misc + └── Pictures + └── photo1.jpg + + In ``delete_children=True`` case, only the node is shifted without its accompanying children/descendants. + + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── Misc\\n" + ... " └── dummy" + ... ) + >>> root.show() + Downloads + ├── Pictures + │ └── photo1.jpg + └── Misc + └── dummy + + >>> shift_and_replace_nodes(root, ["Downloads/Pictures"], ["Downloads/Misc/dummy"], delete_children=True) + >>> root.show() + Downloads + └── Misc + └── Pictures + + Args: + tree (Node): tree to modify + from_paths (List[str]): original paths to shift nodes from + to_paths (List[str]): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + skippable (bool): indicator to skip if from path is not found, defaults to False + delete_children (bool): indicator to shift node only without children, defaults to False + with_full_path (bool): indicator to shift/copy node with full path in `from_paths`, results in faster search, + defaults to False + """ + return replace_logic( + tree=tree, + from_paths=from_paths, + to_paths=to_paths, + sep=sep, + copy=False, + skippable=skippable, + delete_children=delete_children, + to_tree=None, + with_full_path=with_full_path, + ) # pragma: no cover + + +def copy_nodes_from_tree_to_tree( + from_tree: Node, + to_tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + skippable: bool = False, + overriding: bool = False, + merge_children: bool = False, + merge_leaves: bool = False, + delete_children: bool = False, + with_full_path: bool = False, +) -> None: + """Copy nodes from `from_paths` to `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable). + - Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden). + - Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged). + - Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged) + - Able to copy node only and delete children, defaults to False (nodes are copied together with children). + + For paths in `from_paths` and `to_paths`, + + - Path name can be with or without leading tree path separator symbol. + + For paths in `from_paths`, + + - Path name can be partial path (trailing part of path) or node name. + - If ``with_full_path=True``, path name must be full path. + - Path name must be unique to one node. + + For paths in `to_paths`, + + - Path name must be full path. + + If ``merge_children=True``, + + - If `to_path` is not present, it copies children of `from_path` + - If `to_path` is present, and ``overriding=False``, original and new children are merged + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained. + + If ``merge_leaves=True``, + + - If `to_path` is not present, it copies leaves of `from_path`. + - If `to_path` is present, and ``overriding=False``, original children and leaves are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained. + + Examples: + >>> from bigtree import Node, str_to_tree, copy_nodes_from_tree_to_tree + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── file1.doc\\n" + ... "├── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── Misc\\n" + ... " └── dummy\\n" + ... " └── photo2.jpg" + ... ) + >>> root.show() + Downloads + ├── file1.doc + ├── Pictures + │ └── photo1.jpg + └── Misc + └── dummy + └── photo2.jpg + + >>> root_other = Node("Documents") + >>> copy_nodes_from_tree_to_tree( + ... from_tree=root, + ... to_tree=root_other, + ... from_paths=["Downloads/Pictures", "Downloads/Misc"], + ... to_paths=["Documents/Pictures", "Documents/New Misc/Misc"], + ... ) + >>> root_other.show() + Documents + ├── Pictures + │ └── photo1.jpg + └── New Misc + └── Misc + └── dummy + └── photo2.jpg + + In overriding case, + + >>> root_other = str_to_tree( + ... "Documents\\n" + ... "└── Pictures\\n" + ... " └── photo3.jpg" + ... ) + >>> root_other.show() + Documents + └── Pictures + └── photo3.jpg + + >>> copy_nodes_from_tree_to_tree( + ... root, + ... root_other, + ... ["Downloads/Pictures", "Downloads/Misc"], + ... ["Documents/Pictures", "Documents/Misc"], + ... overriding=True, + ... ) + >>> root_other.show() + Documents + ├── Pictures + │ └── photo1.jpg + └── Misc + └── dummy + └── photo2.jpg + + In ``merge_children=True`` case, child nodes are copied instead of the parent node. + + - If the path already exists, child nodes are merged with existing children. + + >>> root_other = str_to_tree( + ... "Documents\\n" + ... "└── Pictures\\n" + ... " └── photo3.jpg" + ... ) + >>> root_other.show() + Documents + └── Pictures + └── photo3.jpg + + >>> copy_nodes_from_tree_to_tree( + ... root, + ... root_other, + ... ["Downloads/Pictures", "Downloads/Misc"], + ... ["Documents/Pictures", "Documents/Misc"], + ... merge_children=True, + ... ) + >>> root_other.show() + Documents + ├── Pictures + │ ├── photo3.jpg + │ └── photo1.jpg + └── dummy + └── photo2.jpg + + In ``merge_leaves=True`` case, leaf nodes are copied instead of the parent node. + + - If the path already exists, leaf nodes are merged with existing children. + + >>> root_other = str_to_tree( + ... "Documents\\n" + ... "└── Pictures\\n" + ... " └── photo3.jpg" + ... ) + >>> root_other.show() + Documents + └── Pictures + └── photo3.jpg + + >>> copy_nodes_from_tree_to_tree( + ... root, + ... root_other, + ... ["Downloads/Pictures", "Downloads/Misc"], + ... ["Documents/Pictures", "Documents/Misc"], + ... merge_leaves=True, + ... ) + >>> root_other.show() + Documents + ├── Pictures + │ ├── photo3.jpg + │ └── photo1.jpg + └── photo2.jpg + + In ``delete_children=True`` case, only the node is copied without its accompanying children/descendants. + + >>> root_other = Node("Documents") + >>> root_other.show() + Documents + + >>> copy_nodes_from_tree_to_tree( + ... root, + ... root_other, + ... ["Downloads/Pictures", "Downloads/Misc"], + ... ["Documents/Pictures", "Documents/Misc"], + ... delete_children=True, + ... ) + >>> root_other.show() + Documents + ├── Pictures + └── Misc + + Args: + from_tree (Node): tree to copy nodes from + to_tree (Node): tree to copy nodes to + from_paths (List[str]): original paths to shift nodes from + to_paths (List[str]): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + skippable (bool): indicator to skip if from path is not found, defaults to False + overriding (bool): indicator to override existing to path if there is clashes, defaults to False + merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False + merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False + delete_children (bool): indicator to copy node only without children, defaults to False + with_full_path (bool): indicator to shift/copy node with full path in `from_paths`, results in faster search, + defaults to False + """ + return copy_or_shift_logic( + tree=from_tree, + from_paths=from_paths, + to_paths=to_paths, + sep=sep, + copy=True, + skippable=skippable, + overriding=overriding, + merge_children=merge_children, + merge_leaves=merge_leaves, + delete_children=delete_children, + to_tree=to_tree, + with_full_path=with_full_path, + ) # pragma: no cover + + +def copy_and_replace_nodes_from_tree_to_tree( + from_tree: Node, + to_tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + skippable: bool = False, + delete_children: bool = False, + with_full_path: bool = False, +) -> None: + """Copy nodes from `from_paths` to *replace* `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable). + - Able to copy node only and delete children, defaults to False (nodes are copied together with children). + + For paths in `from_paths` and `to_paths`, + + - Path name can be with or without leading tree path separator symbol. + + For paths in `from_paths`, + + - Path name can be partial path (trailing part of path) or node name. + - If ``with_full_path=True``, path name must be full path. + - Path name must be unique to one node. + + For paths in `to_paths`, + + - Path name must be full path. + - Path must exist, node-to-be-replaced must be present. + + Examples: + >>> from bigtree import str_to_tree, copy_and_replace_nodes_from_tree_to_tree + >>> root = str_to_tree( + ... "Downloads\\n" + ... "├── file1.doc\\n" + ... "├── Pictures\\n" + ... "│ └── photo1.jpg\\n" + ... "└── Misc\\n" + ... " └── dummy\\n" + ... " └── photo2.jpg" + ... ) + >>> root.show() + Downloads + ├── file1.doc + ├── Pictures + │ └── photo1.jpg + └── Misc + └── dummy + └── photo2.jpg + + >>> root_other = str_to_tree( + ... "Documents\\n" + ... "├── Pictures2\\n" + ... "│ └── photo2.jpg\\n" + ... "└── Misc2" + ... ) + >>> root_other.show() + Documents + ├── Pictures2 + │ └── photo2.jpg + └── Misc2 + + >>> copy_and_replace_nodes_from_tree_to_tree( + ... from_tree=root, + ... to_tree=root_other, + ... from_paths=["Downloads/Pictures", "Downloads/Misc"], + ... to_paths=["Documents/Pictures2/photo2.jpg", "Documents/Misc2"], + ... ) + >>> root_other.show() + Documents + ├── Pictures2 + │ └── Pictures + │ └── photo1.jpg + └── Misc + └── dummy + └── photo2.jpg + + In ``delete_children=True`` case, only the node is copied without its accompanying children/descendants. + + >>> root_other = str_to_tree( + ... "Documents\\n" + ... "├── Pictures2\\n" + ... "│ └── photo2.jpg\\n" + ... "└── Misc2" + ... ) + >>> root_other.show() + Documents + ├── Pictures2 + │ └── photo2.jpg + └── Misc2 + + >>> copy_and_replace_nodes_from_tree_to_tree( + ... from_tree=root, + ... to_tree=root_other, + ... from_paths=["Downloads/Pictures", "Downloads/Misc"], + ... to_paths=["Documents/Pictures2/photo2.jpg", "Documents/Misc2"], + ... delete_children=True, + ... ) + >>> root_other.show() + Documents + ├── Pictures2 + │ └── Pictures + └── Misc + + Args: + from_tree (Node): tree to copy nodes from + to_tree (Node): tree to copy nodes to + from_paths (List[str]): original paths to shift nodes from + to_paths (List[str]): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + skippable (bool): indicator to skip if from path is not found, defaults to False + delete_children (bool): indicator to copy node only without children, defaults to False + with_full_path (bool): indicator to shift/copy node with full path in `from_paths`, results in faster search, + defaults to False + """ + return replace_logic( + tree=from_tree, + from_paths=from_paths, + to_paths=to_paths, + sep=sep, + copy=True, + skippable=skippable, + delete_children=delete_children, + to_tree=to_tree, + with_full_path=with_full_path, + ) # pragma: no cover + + +def copy_or_shift_logic( + tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + copy: bool = False, + skippable: bool = False, + overriding: bool = False, + merge_children: bool = False, + merge_leaves: bool = False, + delete_children: bool = False, + to_tree: Optional[Node] = None, + with_full_path: bool = False, +) -> None: + """Shift or copy nodes from `from_paths` to `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to copy node, defaults to False (nodes are shifted; not copied). + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable) + - Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden) + - Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged) + - Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged) + - Able to shift/copy node only and delete children, defaults to False (nodes are shifted/copied together with children). + - Able to shift/copy nodes from one tree to another tree, defaults to None (shifting/copying happens within same tree) + + For paths in `from_paths` and `to_paths`, + + - Path name can be with or without leading tree path separator symbol. + + For paths in `from_paths`, + + - Path name can be partial path (trailing part of path) or node name. + - If ``with_full_path=True``, path name must be full path. + - Path name must be unique to one node. + + For paths in `to_paths`, + + - Path name must be full path. + - Can set to empty string or None to delete the path in `from_paths`, note that ``copy`` must be set to False. + + If ``merge_children=True``, + + - If `to_path` is not present, it shifts/copies children of `from_path`. + - If `to_path` is present, and ``overriding=False``, original and new children are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new children are retained. + + If ``merge_leaves=True``, + + - If `to_path` is not present, it shifts/copies leaves of `from_path`. + - If `to_path` is present, and ``overriding=False``, original children and leaves are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained, + original non-leaf nodes in `from_path` are retained. + + Args: + tree (Node): tree to modify + from_paths (List[str]): original paths to shift nodes from + to_paths (List[str]): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + copy (bool): indicator to copy node, defaults to False + skippable (bool): indicator to skip if from path is not found, defaults to False + overriding (bool): indicator to override existing to path if there is clashes, defaults to False + merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False + merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False + delete_children (bool): indicator to shift/copy node only without children, defaults to False + to_tree (Node): tree to copy to, defaults to None + with_full_path (bool): indicator to shift/copy node with full path in `from_paths`, results in faster search, + defaults to False + """ + if merge_children and merge_leaves: + raise ValueError( + "Invalid shifting, can only specify one type of merging, check `merge_children` and `merge_leaves`" + ) + if not (isinstance(from_paths, list) and isinstance(to_paths, list)): + raise ValueError( + "Invalid type, `from_paths` and `to_paths` should be list type" + ) + if len(from_paths) != len(to_paths): + raise ValueError( + f"Paths are different length, input `from_paths` have {len(from_paths)} entries, " + f"while output `to_paths` have {len(to_paths)} entries" + ) + if copy and (None in to_paths or "" in to_paths): + raise ValueError( + "Deletion of node will not happen if `copy=True`, check your `copy` parameter." + ) + + # Modify `sep` of from_paths and to_paths + if not to_tree: + to_tree = tree + tree_sep = to_tree.sep + from_paths = [path.rstrip(sep).replace(sep, tree.sep) for path in from_paths] + to_paths = [ + path.rstrip(sep).replace(sep, tree_sep) if path else None for path in to_paths + ] + + for from_path, to_path in zip(from_paths, to_paths): + if to_path: + if from_path.split(tree.sep)[-1] != to_path.split(tree_sep)[-1]: + raise ValueError( + f"Unable to assign from_path {from_path} to to_path {to_path}\n" + f"Verify that `sep` is defined correctly for path\n" + f"Alternatively, check that `from_path` and `to_path` is reassigning the same node." + ) + + if with_full_path: + if not all( + [ + path.lstrip(tree.sep).split(tree.sep)[0] == tree.root.node_name + for path in from_paths + ] + ): + raise ValueError( + "Invalid path in `from_paths` not starting with the root node. " + "Check your `from_paths` parameter, alternatively set `with_full_path=False` to shift " + "partial path instead of full path." + ) + if not all( + [ + path.lstrip(tree_sep).split(tree_sep)[0] == to_tree.root.node_name + for path in to_paths + if path + ] + ): + raise ValueError( + "Invalid path in `to_paths` not starting with the root node. Check your `to_paths` parameter." + ) + + # Perform shifting/copying + for from_path, to_path in zip(from_paths, to_paths): + if with_full_path: + from_node = find_full_path(tree, from_path) + else: + from_node = find_path(tree, from_path) + + # From node not found + if not from_node: + if not skippable: + raise NotFoundError( + f"Unable to find from_path {from_path}\n" + f"Set `skippable` to True to skip shifting for nodes not found" + ) + else: + logging.info(f"Unable to find from_path {from_path}") + + # From node found + else: + # Node to be deleted + if not to_path: + to_node = None + # Node to be copied/shifted + else: + to_node = find_full_path(to_tree, to_path) + + # To node found + if to_node: + if from_node == to_node: + if merge_children: + parent = to_node.parent + to_node.parent = None + to_node = parent + elif merge_leaves: + to_node = to_node.parent + else: + raise TreeError( + f"Attempting to shift the same node {from_node.node_name} back to the same position\n" + f"Check from path {from_path} and to path {to_path}\n" + f"Alternatively, set `merge_children` or `merge_leaves` to True if intermediate node is to be removed" + ) + elif merge_children: + # Specify override to remove existing node, else children are merged + if not overriding: + logging.info( + f"Path {to_path} already exists and children are merged" + ) + else: + logging.info( + f"Path {to_path} already exists and its children be overridden by the merge" + ) + parent = to_node.parent + to_node.parent = None + to_node = parent + merge_children = False + elif merge_leaves: + # Specify override to remove existing node, else leaves are merged + if not overriding: + logging.info( + f"Path {to_path} already exists and leaves are merged" + ) + else: + logging.info( + f"Path {to_path} already exists and its leaves be overridden by the merge" + ) + del to_node.children + else: + if not overriding: + raise TreeError( + f"Path {to_path} already exists and unable to override\n" + f"Set `overriding` to True to perform overrides\n" + f"Alternatively, set `merge_children` to True if nodes are to be merged" + ) + logging.info( + f"Path {to_path} already exists and will be overridden" + ) + parent = to_node.parent + to_node.parent = None + to_node = parent + + # To node not found + else: + # Find parent node, create intermediate parent node if applicable + to_path_parent = tree_sep.join(to_path.split(tree_sep)[:-1]) + to_node = add_path_to_tree(to_tree, to_path_parent, sep=tree_sep) + + # Reassign from_node to new parent + if copy: + logging.debug(f"Copying {from_node.node_name}") + from_node = from_node.copy() + if merge_children: + logging.debug( + f"Reassigning children from {from_node.node_name} to {to_node.node_name}" + ) + for children in from_node.children: + if delete_children: + del children.children + children.parent = to_node + from_node.parent = None + elif merge_leaves: + logging.debug( + f"Reassigning leaf nodes from {from_node.node_name} to {to_node.node_name}" + ) + for children in from_node.leaves: + children.parent = to_node + else: + if delete_children: + del from_node.children + from_node.parent = to_node + + +def replace_logic( + tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + copy: bool = False, + skippable: bool = False, + delete_children: bool = False, + to_tree: Optional[Node] = None, + with_full_path: bool = False, +) -> None: + """Shift or copy nodes from `from_paths` to *replace* `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to copy node, defaults to False (nodes are shifted; not copied). + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable) + - Able to replace node only and delete children, defaults to False (nodes are shifted/copied together with children). + - Able to shift/copy nodes from one tree to another tree, defaults to None (shifting/copying happens within same tree) + + For paths in `from_paths` and `to_paths`, + + - Path name can be with or without leading tree path separator symbol. + + For paths in `from_paths`, + + - Path name can be partial path (trailing part of path) or node name. + - If ``with_full_path=True``, path name must be full path. + - Path name must be unique to one node. + + For paths in `to_paths`, + + - Path name must be full path. + - Path must exist, node-to-be-replaced must be present. + + Args: + tree (Node): tree to modify + from_paths (List[str]): original paths to shift nodes from + to_paths (List[str]): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + copy (bool): indicator to copy node, defaults to False + skippable (bool): indicator to skip if from path is not found, defaults to False + delete_children (bool): indicator to shift/copy node only without children, defaults to False + to_tree (Node): tree to copy to, defaults to None + with_full_path (bool): indicator to shift/copy node with full path in `from_paths`, results in faster search, + defaults to False + """ + if not (isinstance(from_paths, list) and isinstance(to_paths, list)): + raise ValueError( + "Invalid type, `from_paths` and `to_paths` should be list type" + ) + if len(from_paths) != len(to_paths): + raise ValueError( + f"Paths are different length, input `from_paths` have {len(from_paths)} entries, " + f"while output `to_paths` have {len(to_paths)} entries" + ) + + # Modify `sep` of from_paths and to_paths + if not to_tree: + to_tree = tree + tree_sep = to_tree.sep + from_paths = [path.rstrip(sep).replace(sep, tree.sep) for path in from_paths] + to_paths = [ + path.rstrip(sep).replace(sep, tree_sep) if path else None for path in to_paths + ] + + if with_full_path: + if not all( + [ + path.lstrip(tree.sep).split(tree.sep)[0] == tree.root.node_name + for path in from_paths + ] + ): + raise ValueError( + "Invalid path in `from_paths` not starting with the root node. " + "Check your `from_paths` parameter, alternatively set `with_full_path=False` to shift " + "partial path instead of full path." + ) + if not all( + [ + path.lstrip(tree_sep).split(tree_sep)[0] == to_tree.root.node_name + for path in to_paths + if path + ] + ): + raise ValueError( + "Invalid path in `to_paths` not starting with the root node. Check your `to_paths` parameter." + ) + + # Perform shifting/copying to replace destination node + for from_path, to_path in zip(from_paths, to_paths): + if with_full_path: + from_node = find_full_path(tree, from_path) + else: + from_node = find_path(tree, from_path) + + # From node not found + if not from_node: + if not skippable: + raise NotFoundError( + f"Unable to find from_path {from_path}\n" + f"Set `skippable` to True to skip shifting for nodes not found" + ) + else: + logging.info(f"Unable to find from_path {from_path}") + + # From node found + else: + to_node = find_full_path(to_tree, to_path) + + # To node found + if to_node: + if from_node == to_node: + raise TreeError( + f"Attempting to replace the same node {from_node.node_name}\n" + f"Check from path {from_path} and to path {to_path}" + ) + + # To node not found + else: + raise NotFoundError(f"Unable to find to_path {to_path}") + + # Replace to_node with from_node + if copy: + logging.debug(f"Copying {from_node.node_name}") + from_node = from_node.copy() + if delete_children: + del from_node.children + parent = to_node.parent + to_node_siblings = parent.children + to_node_idx = to_node_siblings.index(to_node) + for node in to_node_siblings[to_node_idx:]: + if node == to_node: + to_node.parent = None + from_node.parent = parent + else: + node.parent = None + node.parent = parent diff --git a/python310/packages/bigtree/tree/search.py b/python310/packages/bigtree/tree/search.py new file mode 100644 index 0000000..2c84001 --- /dev/null +++ b/python310/packages/bigtree/tree/search.py @@ -0,0 +1,479 @@ +from typing import Any, Callable, Iterable, List, Tuple, TypeVar, Union + +from bigtree.node.basenode import BaseNode +from bigtree.node.dagnode import DAGNode +from bigtree.node.node import Node +from bigtree.utils.exceptions import SearchError +from bigtree.utils.iterators import preorder_iter + +__all__ = [ + "findall", + "find", + "find_name", + "find_names", + "find_relative_path", + "find_full_path", + "find_path", + "find_paths", + "find_attr", + "find_attrs", + "find_children", + "find_child", + "find_child_by_name", +] + + +T = TypeVar("T", bound=BaseNode) +NodeT = TypeVar("NodeT", bound=Node) +DAGNodeT = TypeVar("DAGNodeT", bound=DAGNode) + + +def findall( + tree: T, + condition: Callable[[T], bool], + max_depth: int = 0, + min_count: int = 0, + max_count: int = 0, +) -> Tuple[T, ...]: + """ + Search tree for nodes matching condition (callable function). + + Examples: + >>> from bigtree import Node, findall + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> findall(root, lambda node: node.age > 62) + (Node(/a, age=90), Node(/a/b, age=65)) + + Args: + tree (BaseNode): tree to search + condition (Callable): function that takes in node as argument, returns node if condition evaluates to `True` + max_depth (int): maximum depth to search for, based on the `depth` attribute, defaults to None + min_count (int): checks for minimum number of occurrences, + raise SearchError if the number of results do not meet min_count, defaults to None + max_count (int): checks for maximum number of occurrences, + raise SearchError if the number of results do not meet min_count, defaults to None + + Returns: + (Tuple[BaseNode, ...]) + """ + result = tuple(preorder_iter(tree, filter_condition=condition, max_depth=max_depth)) + if min_count and len(result) < min_count: + raise SearchError( + f"Expected more than {min_count} element(s), found {len(result)} elements\n{result}" + ) + if max_count and len(result) > max_count: + raise SearchError( + f"Expected less than {max_count} element(s), found {len(result)} elements\n{result}" + ) + return result + + +def find(tree: T, condition: Callable[[T], bool], max_depth: int = 0) -> T: + """ + Search tree for *single node* matching condition (callable function). + + Examples: + >>> from bigtree import Node, find + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find(root, lambda node: node.age == 65) + Node(/a/b, age=65) + >>> find(root, lambda node: node.age > 5) + Traceback (most recent call last): + ... + bigtree.utils.exceptions.SearchError: Expected less than 1 element(s), found 4 elements + (Node(/a, age=90), Node(/a/b, age=65), Node(/a/c, age=60), Node(/a/c/d, age=40)) + + Args: + tree (BaseNode): tree to search + condition (Callable): function that takes in node as argument, returns node if condition evaluates to `True` + max_depth (int): maximum depth to search for, based on the `depth` attribute, defaults to None + + Returns: + (BaseNode) + """ + result = findall(tree, condition, max_depth, max_count=1) + if result: + return result[0] + + +def find_name(tree: NodeT, name: str, max_depth: int = 0) -> NodeT: + """ + Search tree for single node matching name attribute. + + Examples: + >>> from bigtree import Node, find_name + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_name(root, "c") + Node(/a/c, age=60) + + Args: + tree (Node): tree to search + name (str): value to match for name attribute + max_depth (int): maximum depth to search for, based on the `depth` attribute, defaults to None + + Returns: + (Node) + """ + return find(tree, lambda node: node.node_name == name, max_depth) + + +def find_names(tree: NodeT, name: str, max_depth: int = 0) -> Iterable[NodeT]: + """ + Search tree for multiple node(s) matching name attribute. + + Examples: + >>> from bigtree import Node, find_names + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("b", age=40, parent=c) + >>> find_names(root, "c") + (Node(/a/c, age=60),) + >>> find_names(root, "b") + (Node(/a/b, age=65), Node(/a/c/b, age=40)) + + Args: + tree (Node): tree to search + name (str): value to match for name attribute + max_depth (int): maximum depth to search for, based on the `depth` attribute, defaults to None + + Returns: + (Iterable[Node]) + """ + return findall(tree, lambda node: node.node_name == name, max_depth) + + +def find_relative_path(tree: NodeT, path_name: str) -> Iterable[NodeT]: + r""" + Search tree for single node matching relative path attribute. + + - Supports unix folder expression for relative path, i.e., '../../node_name' + - Supports wildcards, i.e., '\*/node_name' + - If path name starts with leading separator symbol, it will start at root node. + + Examples: + >>> from bigtree import Node, find_relative_path + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_relative_path(d, "..") + (Node(/a/c, age=60),) + >>> find_relative_path(d, "../../b") + (Node(/a/b, age=65),) + >>> find_relative_path(d, "../../*") + (Node(/a/b, age=65), Node(/a/c, age=60)) + + Args: + tree (Node): tree to search + path_name (str): value to match (relative path) of path_name attribute + + Returns: + (Iterable[Node]) + """ + sep = tree.sep + if path_name.startswith(sep): + resolved_node = find_full_path(tree, path_name) + return (resolved_node,) + path_name = path_name.rstrip(sep).lstrip(sep) + path_list = path_name.split(sep) + wildcard_indicator = "*" in path_name + resolved_nodes: List[NodeT] = [] + + def resolve(node: NodeT, path_idx: int) -> None: + """Resolve node based on path name + + Args: + node (Node): current node + path_idx (int): current index in path_list + """ + if path_idx == len(path_list): + resolved_nodes.append(node) + else: + path_component = path_list[path_idx] + if path_component == ".": + resolve(node, path_idx + 1) + elif path_component == "..": + if node.is_root: + raise SearchError("Invalid path name. Path goes beyond root node.") + resolve(node.parent, path_idx + 1) + elif path_component == "*": + for child in node.children: + resolve(child, path_idx + 1) + else: + node = find_child_by_name(node, path_component) + if not node: + if not wildcard_indicator: + raise SearchError( + f"Invalid path name. Node {path_component} cannot be found." + ) + else: + resolve(node, path_idx + 1) + + resolve(tree, 0) + + return tuple(resolved_nodes) + + +def find_full_path(tree: NodeT, path_name: str) -> NodeT: + """ + Search tree for single node matching path attribute. + + - Path name can be with or without leading tree path separator symbol. + - Path name must be full path, works similar to `find_path` but faster. + + Examples: + >>> from bigtree import Node, find_full_path + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_full_path(root, "/a/c/d") + Node(/a/c/d, age=40) + + Args: + tree (Node): tree to search + path_name (str): value to match (full path) of path_name attribute + + Returns: + (Node) + """ + sep = tree.sep + path_list = path_name.rstrip(sep).lstrip(sep).split(sep) + if path_list[0] != tree.root.node_name: + raise ValueError( + f"Path {path_name} does not match the root node name {tree.root.node_name}" + ) + parent_node = tree.root + child_node = parent_node + for child_name in path_list[1:]: + child_node = find_child_by_name(parent_node, child_name) + if not child_node: + break + parent_node = child_node + return child_node + + +def find_path(tree: NodeT, path_name: str) -> NodeT: + """ + Search tree for single node matching path attribute. + + - Path name can be with or without leading tree path separator symbol. + - Path name can be full path or partial path (trailing part of path) or node name. + + Examples: + >>> from bigtree import Node, find_path + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_path(root, "c") + Node(/a/c, age=60) + >>> find_path(root, "/c") + Node(/a/c, age=60) + + Args: + tree (Node): tree to search + path_name (str): value to match (full path) or trailing part (partial path) of path_name attribute + + Returns: + (Node) + """ + path_name = path_name.rstrip(tree.sep) + return find(tree, lambda node: node.path_name.endswith(path_name)) + + +def find_paths(tree: NodeT, path_name: str) -> Tuple[NodeT, ...]: + """ + Search tree for multiple nodes matching path attribute. + + - Path name can be with or without leading tree path separator symbol. + - Path name can be partial path (trailing part of path) or node name. + + Examples: + >>> from bigtree import Node, find_paths + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("c", age=40, parent=c) + >>> find_paths(root, "/a/c") + (Node(/a/c, age=60),) + >>> find_paths(root, "/c") + (Node(/a/c, age=60), Node(/a/c/c, age=40)) + + Args: + tree (Node): tree to search + path_name (str): value to match (full path) or trailing part (partial path) of path_name attribute + + Returns: + (Tuple[Node, ...]) + """ + path_name = path_name.rstrip(tree.sep) + return findall(tree, lambda node: node.path_name.endswith(path_name)) + + +def find_attr( + tree: BaseNode, attr_name: str, attr_value: Any, max_depth: int = 0 +) -> BaseNode: + """ + Search tree for single node matching custom attribute. + + Examples: + >>> from bigtree import Node, find_attr + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_attr(root, "age", 65) + Node(/a/b, age=65) + + Args: + tree (BaseNode): tree to search + attr_name (str): attribute name to perform matching + attr_value (Any): value to match for attr_name attribute + max_depth (int): maximum depth to search for, based on the `depth` attribute, defaults to None + + Returns: + (BaseNode) + """ + return find( + tree, + lambda node: bool(node.get_attr(attr_name) == attr_value), + max_depth, + ) + + +def find_attrs( + tree: BaseNode, attr_name: str, attr_value: Any, max_depth: int = 0 +) -> Tuple[BaseNode, ...]: + """ + Search tree for node(s) matching custom attribute. + + Examples: + >>> from bigtree import Node, find_attrs + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=65, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_attrs(root, "age", 65) + (Node(/a/b, age=65), Node(/a/c, age=65)) + + Args: + tree (BaseNode): tree to search + attr_name (str): attribute name to perform matching + attr_value (Any): value to match for attr_name attribute + max_depth (int): maximum depth to search for, based on the `depth` attribute, defaults to None + + Returns: + (Tuple[BaseNode, ...]) + """ + return findall( + tree, + lambda node: bool(node.get_attr(attr_name) == attr_value), + max_depth, + ) + + +def find_children( + tree: Union[T, DAGNodeT], + condition: Callable[[Union[T, DAGNodeT]], bool], + min_count: int = 0, + max_count: int = 0, +) -> Tuple[Union[T, DAGNodeT], ...]: + """ + Search children for nodes matching condition (callable function). + + Examples: + >>> from bigtree import Node, find_children + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_children(root, lambda node: node.age > 30) + (Node(/a/b, age=65), Node(/a/c, age=60)) + + Args: + tree (BaseNode/DAGNode): tree to search for its children + condition (Callable): function that takes in node as argument, returns node if condition evaluates to `True` + min_count (int): checks for minimum number of occurrences, + raise SearchError if the number of results do not meet min_count, defaults to None + max_count (int): checks for maximum number of occurrences, + raise SearchError if the number of results do not meet min_count, defaults to None + + Returns: + (BaseNode/DAGNode) + """ + result = tuple([node for node in tree.children if node and condition(node)]) + if min_count and len(result) < min_count: + raise SearchError( + f"Expected more than {min_count} element(s), found {len(result)} elements\n{result}" + ) + if max_count and len(result) > max_count: + raise SearchError( + f"Expected less than {max_count} element(s), found {len(result)} elements\n{result}" + ) + return result + + +def find_child( + tree: Union[T, DAGNodeT], + condition: Callable[[Union[T, DAGNodeT]], bool], +) -> Union[T, DAGNodeT]: + """ + Search children for *single node* matching condition (callable function). + + Examples: + >>> from bigtree import Node, find_child + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_child(root, lambda node: node.age > 62) + Node(/a/b, age=65) + + Args: + tree (BaseNode/DAGNode): tree to search for its child + condition (Callable): function that takes in node as argument, returns node if condition evaluates to `True` + + Returns: + (BaseNode/DAGNode) + """ + result = find_children(tree, condition, max_count=1) + if result: + return result[0] + + +def find_child_by_name( + tree: Union[NodeT, DAGNodeT], name: str +) -> Union[NodeT, DAGNodeT]: + """ + Search tree for single node matching name attribute. + + Examples: + >>> from bigtree import Node, find_child_by_name + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_child_by_name(root, "c") + Node(/a/c, age=60) + >>> find_child_by_name(c, "d") + Node(/a/c/d, age=40) + + Args: + tree (Node/DAGNode): tree to search, parent node + name (str): value to match for name attribute, child node + + Returns: + (Node/DAGNode) + """ + return find_child(tree, lambda node: node.node_name == name) diff --git a/python310/packages/bigtree/utils/__init__.py b/python310/packages/bigtree/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python310/packages/bigtree/utils/assertions.py b/python310/packages/bigtree/utils/assertions.py new file mode 100644 index 0000000..8b0a651 --- /dev/null +++ b/python310/packages/bigtree/utils/assertions.py @@ -0,0 +1,53 @@ +from typing import Any, Dict, List + + +def assert_style_in_dict( + parameter: Any, + accepted_parameters: Dict[str, Any], +) -> None: + """Raise ValueError is parameter is not in list of accepted parameters + + Args: + parameter (Any): argument input for parameter + accepted_parameters (List[Any]): list of accepted parameters + """ + if parameter not in accepted_parameters and parameter != "custom": + raise ValueError( + f"Choose one of {accepted_parameters.keys()} style, use `custom` to define own style" + ) + + +def assert_str_in_list( + parameter_name: str, + parameter: Any, + accepted_parameters: List[Any], +) -> None: + """Raise ValueError is parameter is not in list of accepted parameters + + Args: + parameter_name (str): parameter name for error message + parameter (Any): argument input for parameter + accepted_parameters (List[Any]): list of accepted parameters + """ + if parameter not in accepted_parameters: + raise ValueError( + f"Invalid input, check `{parameter_name}` should be one of {accepted_parameters}" + ) + + +def assert_key_in_dict( + parameter_name: str, + parameter: Any, + accepted_parameters: Dict[Any, Any], +) -> None: + """Raise ValueError is parameter is not in key of dictionary + + Args: + parameter_name (str): parameter name for error message + parameter (Any): argument input for parameter + accepted_parameters (Dict[Any]): dictionary of accepted parameters + """ + if parameter not in accepted_parameters: + raise ValueError( + f"Invalid input, check `{parameter_name}` should be one of {accepted_parameters.keys()}" + ) diff --git a/python310/packages/bigtree/utils/constants.py b/python310/packages/bigtree/utils/constants.py new file mode 100644 index 0000000..3437b36 --- /dev/null +++ b/python310/packages/bigtree/utils/constants.py @@ -0,0 +1,165 @@ +from enum import Enum, auto +from typing import Dict, List, Tuple + + +class ExportConstants: + DOWN_RIGHT = "\u250c" + VERTICAL_RIGHT = "\u251c" + VERTICAL_LEFT = "\u2524" + VERTICAL_HORIZONTAL = "\u253c" + UP_RIGHT = "\u2514" + VERTICAL = "\u2502" + HORIZONTAL = "\u2500" + + DOWN_RIGHT_ROUNDED = "\u256D" + UP_RIGHT_ROUNDED = "\u2570" + + DOWN_RIGHT_BOLD = "\u250F" + VERTICAL_RIGHT_BOLD = "\u2523" + VERTICAL_LEFT_BOLD = "\u252B" + VERTICAL_HORIZONTAL_BOLD = "\u254B" + UP_RIGHT_BOLD = "\u2517" + VERTICAL_BOLD = "\u2503" + HORIZONTAL_BOLD = "\u2501" + + DOWN_RIGHT_DOUBLE = "\u2554" + VERTICAL_RIGHT_DOUBLE = "\u2560" + VERTICAL_LEFT_DOUBLE = "\u2563" + VERTICAL_HORIZONTAL_DOUBLE = "\u256C" + UP_RIGHT_DOUBLE = "\u255a" + VERTICAL_DOUBLE = "\u2551" + HORIZONTAL_DOUBLE = "\u2550" + + PRINT_STYLES: Dict[str, Tuple[str, str, str]] = { + "ansi": ("| ", "|-- ", "`-- "), + "ascii": ("| ", "|-- ", "+-- "), + "const": ( + f"{VERTICAL} ", + f"{VERTICAL_RIGHT}{HORIZONTAL}{HORIZONTAL} ", + f"{UP_RIGHT}{HORIZONTAL}{HORIZONTAL} ", + ), + "const_bold": ( + f"{VERTICAL_BOLD} ", + f"{VERTICAL_RIGHT_BOLD}{HORIZONTAL_BOLD}{HORIZONTAL_BOLD} ", + f"{UP_RIGHT_BOLD}{HORIZONTAL_BOLD}{HORIZONTAL_BOLD} ", + ), + "rounded": ( + f"{VERTICAL} ", + f"{VERTICAL_RIGHT}{HORIZONTAL}{HORIZONTAL} ", + f"{UP_RIGHT_ROUNDED}{HORIZONTAL}{HORIZONTAL} ", + ), + "double": ( + f"{VERTICAL_DOUBLE} ", + f"{VERTICAL_RIGHT_DOUBLE}{HORIZONTAL_DOUBLE}{HORIZONTAL_DOUBLE} ", + f"{UP_RIGHT_DOUBLE}{HORIZONTAL_DOUBLE}{HORIZONTAL_DOUBLE} ", + ), + } + + HPRINT_STYLES: Dict[str, Tuple[str, str, str, str, str, str, str]] = { + "ansi": ("/", "+", "+", "+", "\\", "|", "-"), + "ascii": ("+", "+", "+", "+", "+", "|", "-"), + "const": ( + DOWN_RIGHT, + VERTICAL_RIGHT, + VERTICAL_LEFT, + VERTICAL_HORIZONTAL, + UP_RIGHT, + VERTICAL, + HORIZONTAL, + ), + "const_bold": ( + DOWN_RIGHT_BOLD, + VERTICAL_RIGHT_BOLD, + VERTICAL_LEFT_BOLD, + VERTICAL_HORIZONTAL_BOLD, + UP_RIGHT_BOLD, + VERTICAL_BOLD, + HORIZONTAL_BOLD, + ), + "rounded": ( + DOWN_RIGHT_ROUNDED, + VERTICAL_RIGHT, + VERTICAL_LEFT, + VERTICAL_HORIZONTAL, + UP_RIGHT_ROUNDED, + VERTICAL, + HORIZONTAL, + ), + "double": ( + DOWN_RIGHT_DOUBLE, + VERTICAL_RIGHT_DOUBLE, + VERTICAL_LEFT_DOUBLE, + VERTICAL_HORIZONTAL_DOUBLE, + UP_RIGHT_DOUBLE, + VERTICAL_DOUBLE, + HORIZONTAL_DOUBLE, + ), + } + + +class MermaidConstants: + RANK_DIR: List[str] = ["TB", "BT", "LR", "RL"] + LINE_SHAPES: List[str] = [ + "basis", + "bumpX", + "bumpY", + "cardinal", + "catmullRom", + "linear", + "monotoneX", + "monotoneY", + "natural", + "step", + "stepAfter", + "stepBefore", + ] + NODE_SHAPES: Dict[str, str] = { + "rounded_edge": """("{label}")""", + "stadium": """(["{label}"])""", + "subroutine": """[["{label}"]]""", + "cylindrical": """[("{label}")]""", + "circle": """(("{label}"))""", + "asymmetric": """>"{label}"]""", + "rhombus": """{{"{label}"}}""", + "hexagon": """{{{{"{label}"}}}}""", + "parallelogram": """[/"{label}"/]""", + "parallelogram_alt": """[\\"{label}"\\]""", + "trapezoid": """[/"{label}"\\]""", + "trapezoid_alt": """[\\"{label}"/]""", + "double_circle": """((("{label}")))""", + } + EDGE_ARROWS: Dict[str, str] = { + "normal": "-->", + "bold": "==>", + "dotted": "-.->", + "open": "---", + "bold_open": "===", + "dotted_open": "-.-", + "invisible": "~~~", + "circle": "--o", + "cross": "--x", + "double_normal": "<-->", + "double_circle": "o--o", + "double_cross": "x--x", + } + + +class NewickState(Enum): + PARSE_STRING = auto() + PARSE_ATTRIBUTE_NAME = auto() + PARSE_ATTRIBUTE_VALUE = auto() + + +class NewickCharacter(str, Enum): + OPEN_BRACKET = "(" + CLOSE_BRACKET = ")" + ATTR_START = "[" + ATTR_END = "]" + ATTR_KEY_VALUE = "=" + ATTR_QUOTE = "'" + SEP = ":" + NODE_SEP = "," + + @classmethod + def values(cls) -> List[str]: + return [c.value for c in cls] diff --git a/python310/packages/bigtree/utils/exceptions.py b/python310/packages/bigtree/utils/exceptions.py new file mode 100644 index 0000000..0cd902c --- /dev/null +++ b/python310/packages/bigtree/utils/exceptions.py @@ -0,0 +1,126 @@ +from functools import wraps +from typing import Any, Callable, TypeVar +from warnings import simplefilter, warn + +T = TypeVar("T") + + +class TreeError(Exception): + """Generic tree exception""" + + pass + + +class LoopError(TreeError): + """Error during node creation""" + + pass + + +class CorruptedTreeError(TreeError): + """Error during node creation""" + + pass + + +class DuplicatedNodeError(TreeError): + """Error during tree creation""" + + pass + + +class NotFoundError(TreeError): + """Error during tree pruning or modification""" + + pass + + +class SearchError(TreeError): + """Error during tree search""" + + pass + + +def deprecated( + alias: str, +) -> Callable[[Callable[..., T]], Callable[..., T]]: # pragma: no cover + def decorator(func: Callable[..., T]) -> Callable[..., T]: + """ + This is a decorator which can be used to mark functions as deprecated. + It will raise a DeprecationWarning when the function is used. + Source: https://stackoverflow.com/a/30253848 + """ + + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> T: + simplefilter("always", DeprecationWarning) + warn( + "{old_func} is going to be deprecated, use {new_func} instead".format( + old_func=func.__name__, + new_func=alias, + ), + category=DeprecationWarning, + stacklevel=2, + ) + simplefilter("default", DeprecationWarning) # reset filter + return func(*args, **kwargs) + + return wrapper + + return decorator + + +def optional_dependencies_pandas( + func: Callable[..., T] +) -> Callable[..., T]: # pragma: no cover + """ + This is a decorator which can be used to import optional pandas dependency. + It will raise a ImportError if the module is not found. + """ + + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> T: + try: + import pandas as pd # noqa: F401 + except ImportError: + raise ImportError( + "pandas not available. Please perform a\n\n" + "pip install 'bigtree[pandas]'\n\nto install required dependencies" + ) from None + return func(*args, **kwargs) + + return wrapper + + +def optional_dependencies_image( + package_name: str = "", +) -> Callable[[Callable[..., T]], Callable[..., T]]: + def decorator(func: Callable[..., T]) -> Callable[..., T]: + """ + This is a decorator which can be used to import optional image dependency. + It will raise a ImportError if the module is not found. + """ + + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> T: + if not package_name or package_name == "pydot": + try: + import pydot # noqa: F401 + except ImportError: # pragma: no cover + raise ImportError( + "pydot not available. Please perform a\n\n" + "pip install 'bigtree[image]'\n\nto install required dependencies" + ) from None + if not package_name or package_name == "Pillow": + try: + from PIL import Image, ImageDraw, ImageFont # noqa: F401 + except ImportError: # pragma: no cover + raise ImportError( + "Pillow not available. Please perform a\n\n" + "pip install 'bigtree[image]'\n\nto install required dependencies" + ) from None + return func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/python310/packages/bigtree/utils/groot.py b/python310/packages/bigtree/utils/groot.py new file mode 100644 index 0000000..22fc06b --- /dev/null +++ b/python310/packages/bigtree/utils/groot.py @@ -0,0 +1,19 @@ +def whoami() -> str: + """Groot utils + + Returns: + (str) + """ + return "I am Groot!" + + +def speak_like_groot(sentence: str) -> str: + """Convert sentence into Groot langauge + + Args: + sentence (str): Sentence to convert to groot language + + Returns: + (str) + """ + return " ".join([whoami() for _ in range(len(sentence.split()))]) diff --git a/python310/packages/bigtree/utils/iterators.py b/python310/packages/bigtree/utils/iterators.py new file mode 100644 index 0000000..e3eb896 --- /dev/null +++ b/python310/packages/bigtree/utils/iterators.py @@ -0,0 +1,587 @@ +from __future__ import annotations + +from typing import ( + TYPE_CHECKING, + Callable, + Iterable, + List, + Optional, + Tuple, + TypeVar, + Union, +) + +if TYPE_CHECKING: + from bigtree.node.basenode import BaseNode + from bigtree.node.binarynode import BinaryNode + from bigtree.node.dagnode import DAGNode + + BaseNodeT = TypeVar("BaseNodeT", bound=BaseNode) + BinaryNodeT = TypeVar("BinaryNodeT", bound=BinaryNode) + DAGNodeT = TypeVar("DAGNodeT", bound=DAGNode) + T = TypeVar("T", bound=Union[BaseNode, DAGNode]) + +__all__ = [ + "inorder_iter", + "preorder_iter", + "postorder_iter", + "levelorder_iter", + "levelordergroup_iter", + "zigzag_iter", + "zigzaggroup_iter", + "dag_iterator", +] + + +def inorder_iter( + tree: BinaryNodeT, + filter_condition: Optional[Callable[[BinaryNodeT], bool]] = None, + max_depth: int = 0, +) -> Iterable[BinaryNodeT]: + """Iterate through all children of a tree. + + In-Order Iteration Algorithm, LNR + 1. Recursively traverse the current node's left subtree. + 2. Visit the current node. + 3. Recursively traverse the current node's right subtree. + + Examples: + >>> from bigtree import BinaryNode, list_to_binarytree, inorder_iter + >>> num_list = [1, 2, 3, 4, 5, 6, 7, 8] + >>> root = list_to_binarytree(num_list) + >>> root.show() + 1 + ├── 2 + │ ├── 4 + │ │ └── 8 + │ └── 5 + └── 3 + ├── 6 + └── 7 + + >>> [node.node_name for node in inorder_iter(root)] + ['8', '4', '2', '5', '1', '6', '3', '7'] + + >>> [node.node_name for node in inorder_iter(root, filter_condition=lambda x: x.node_name in ["1", "4", "3", "6", "7"])] + ['4', '1', '6', '3', '7'] + + >>> [node.node_name for node in inorder_iter(root, max_depth=3)] + ['4', '2', '5', '1', '6', '3', '7'] + + Args: + tree (BinaryNode): input tree + filter_condition (Optional[Callable[[BinaryNode], bool]]): function that takes in node as argument, optional + Return node if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, optional + + Returns: + (Iterable[BinaryNode]) + """ + if tree and (not max_depth or not tree.depth > max_depth): + yield from inorder_iter(tree.left, filter_condition, max_depth) + if not filter_condition or filter_condition(tree): + yield tree + yield from inorder_iter(tree.right, filter_condition, max_depth) + + +def preorder_iter( + tree: T, + filter_condition: Optional[Callable[[T], bool]] = None, + stop_condition: Optional[Callable[[T], bool]] = None, + max_depth: int = 0, +) -> Iterable[T]: + """Iterate through all children of a tree. + + Pre-Order Iteration Algorithm, NLR + 1. Visit the current node. + 2. Recursively traverse the current node's left subtree. + 3. Recursively traverse the current node's right subtree. + + It is topologically sorted because a parent node is processed before its child nodes. + + Examples: + >>> from bigtree import Node, list_to_tree, preorder_iter + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [node.node_name for node in preorder_iter(root)] + ['a', 'b', 'd', 'e', 'g', 'h', 'c', 'f'] + + >>> [node.node_name for node in preorder_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + ['a', 'd', 'e', 'g', 'f'] + + >>> [node.node_name for node in preorder_iter(root, stop_condition=lambda x: x.node_name == "e")] + ['a', 'b', 'd', 'c', 'f'] + + >>> [node.node_name for node in preorder_iter(root, max_depth=3)] + ['a', 'b', 'd', 'e', 'c', 'f'] + + Args: + tree (Union[BaseNode, DAGNode]): input tree + filter_condition (Optional[Callable[[T], bool]]): function that takes in node as argument, optional + Return node if condition evaluates to `True` + stop_condition (Optional[Callable[[T], bool]]): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, optional + + Returns: + (Union[Iterable[BaseNode], Iterable[DAGNode]]) + """ + if ( + tree + and (not max_depth or not tree.get_attr("depth") > max_depth) + and (not stop_condition or not stop_condition(tree)) + ): + if not filter_condition or filter_condition(tree): + yield tree + for child in tree.children: + yield from preorder_iter(child, filter_condition, stop_condition, max_depth) # type: ignore + + +def postorder_iter( + tree: BaseNodeT, + filter_condition: Optional[Callable[[BaseNodeT], bool]] = None, + stop_condition: Optional[Callable[[BaseNodeT], bool]] = None, + max_depth: int = 0, +) -> Iterable[BaseNodeT]: + """Iterate through all children of a tree. + + Post-Order Iteration Algorithm, LRN + 1. Recursively traverse the current node's left subtree. + 2. Recursively traverse the current node's right subtree. + 3. Visit the current node. + + Examples: + >>> from bigtree import Node, list_to_tree, postorder_iter + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [node.node_name for node in postorder_iter(root)] + ['d', 'g', 'h', 'e', 'b', 'f', 'c', 'a'] + + >>> [node.node_name for node in postorder_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + ['d', 'g', 'e', 'f', 'a'] + + >>> [node.node_name for node in postorder_iter(root, stop_condition=lambda x: x.node_name == "e")] + ['d', 'b', 'f', 'c', 'a'] + + >>> [node.node_name for node in postorder_iter(root, max_depth=3)] + ['d', 'e', 'b', 'f', 'c', 'a'] + + Args: + tree (BaseNode): input tree + filter_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Return node if condition evaluates to `True` + stop_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, optional + + Returns: + (Iterable[BaseNode]) + """ + if ( + tree + and (not max_depth or not tree.depth > max_depth) + and (not stop_condition or not stop_condition(tree)) + ): + for child in tree.children: + yield from postorder_iter( + child, filter_condition, stop_condition, max_depth + ) + if not filter_condition or filter_condition(tree): + yield tree + + +def levelorder_iter( + tree: BaseNodeT, + filter_condition: Optional[Callable[[BaseNodeT], bool]] = None, + stop_condition: Optional[Callable[[BaseNodeT], bool]] = None, + max_depth: int = 0, +) -> Iterable[BaseNodeT]: + """Iterate through all children of a tree. + + Level-Order Iteration Algorithm + 1. Recursively traverse the nodes on same level. + + Examples: + >>> from bigtree import Node, list_to_tree, levelorder_iter + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [node.node_name for node in levelorder_iter(root)] + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] + + >>> [node.node_name for node in levelorder_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + ['a', 'd', 'e', 'f', 'g'] + + >>> [node.node_name for node in levelorder_iter(root, stop_condition=lambda x: x.node_name == "e")] + ['a', 'b', 'c', 'd', 'f'] + + >>> [node.node_name for node in levelorder_iter(root, max_depth=3)] + ['a', 'b', 'c', 'd', 'e', 'f'] + + Args: + tree (BaseNode): input tree + filter_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Return node if condition evaluates to `True` + stop_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, defaults to None + + Returns: + (Iterable[BaseNode]) + """ + + def _levelorder_iter(trees: List[BaseNodeT]) -> Iterable[BaseNodeT]: + """Iterate through all children of a tree. + + Args: + trees (List[BaseNode]): trees to get children for next level + + Returns: + (Iterable[BaseNode]) + """ + next_level = [] + for _tree in trees: + if _tree: + if (not max_depth or not _tree.depth > max_depth) and ( + not stop_condition or not stop_condition(_tree) + ): + if not filter_condition or filter_condition(_tree): + yield _tree + next_level.extend(list(_tree.children)) + if len(next_level): + yield from _levelorder_iter(next_level) + + yield from _levelorder_iter([tree]) + + +def levelordergroup_iter( + tree: BaseNodeT, + filter_condition: Optional[Callable[[BaseNodeT], bool]] = None, + stop_condition: Optional[Callable[[BaseNodeT], bool]] = None, + max_depth: int = 0, +) -> Iterable[Iterable[BaseNodeT]]: + """Iterate through all children of a tree. + + Level-Order Group Iteration Algorithm + 1. Recursively traverse the nodes on same level, returns nodes level by level in a nested list. + + Examples: + >>> from bigtree import Node, list_to_tree, levelordergroup_iter + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [[node.node_name for node in group] for group in levelordergroup_iter(root)] + [['a'], ['b', 'c'], ['d', 'e', 'f'], ['g', 'h']] + + >>> [[node.node_name for node in group] for group in levelordergroup_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + [['a'], [], ['d', 'e', 'f'], ['g']] + + >>> [[node.node_name for node in group] for group in levelordergroup_iter(root, stop_condition=lambda x: x.node_name == "e")] + [['a'], ['b', 'c'], ['d', 'f']] + + >>> [[node.node_name for node in group] for group in levelordergroup_iter(root, max_depth=3)] + [['a'], ['b', 'c'], ['d', 'e', 'f']] + + Args: + tree (BaseNode): input tree + filter_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Return node if condition evaluates to `True` + stop_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, defaults to None + + Returns: + (Iterable[Iterable[BaseNode]]) + """ + + def _levelordergroup_iter(trees: List[BaseNodeT]) -> Iterable[Iterable[BaseNodeT]]: + """Iterate through all children of a tree. + + Args: + trees (List[BaseNode]): trees to get children for next level + + Returns: + (Iterable[Iterable[BaseNode]]) + """ + current_tree = [] + next_level = [] + for _tree in trees: + if (not max_depth or not _tree.depth > max_depth) and ( + not stop_condition or not stop_condition(_tree) + ): + if not filter_condition or filter_condition(_tree): + current_tree.append(_tree) + next_level.extend([_child for _child in _tree.children if _child]) + yield tuple(current_tree) + if len(next_level) and (not max_depth or not next_level[0].depth > max_depth): + yield from _levelordergroup_iter(next_level) + + yield from _levelordergroup_iter([tree]) + + +def zigzag_iter( + tree: BaseNodeT, + filter_condition: Optional[Callable[[BaseNodeT], bool]] = None, + stop_condition: Optional[Callable[[BaseNodeT], bool]] = None, + max_depth: int = 0, +) -> Iterable[BaseNodeT]: + """Iterate through all children of a tree. + + ZigZag Iteration Algorithm + 1. Recursively traverse the nodes on same level, in a zigzag manner across different levels. + + Examples: + >>> from bigtree import Node, list_to_tree, zigzag_iter + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [node.node_name for node in zigzag_iter(root)] + ['a', 'c', 'b', 'd', 'e', 'f', 'h', 'g'] + + >>> [node.node_name for node in zigzag_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + ['a', 'd', 'e', 'f', 'g'] + + >>> [node.node_name for node in zigzag_iter(root, stop_condition=lambda x: x.node_name == "e")] + ['a', 'c', 'b', 'd', 'f'] + + >>> [node.node_name for node in zigzag_iter(root, max_depth=3)] + ['a', 'c', 'b', 'd', 'e', 'f'] + + Args: + tree (BaseNode): input tree + filter_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Return node if condition evaluates to `True` + stop_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, defaults to None + + Returns: + (Iterable[BaseNode]) + """ + + def _zigzag_iter( + trees: List[BaseNodeT], reverse_indicator: bool = False + ) -> Iterable[BaseNodeT]: + """Iterate through all children of a tree. + + Args: + trees (List[BaseNode]): trees to get children for next level + reverse_indicator (bool): indicator whether it is in reverse order + + Returns: + (Iterable[BaseNode]) + """ + next_level = [] + for _tree in trees: + if _tree: + if (not max_depth or not _tree.depth > max_depth) and ( + not stop_condition or not stop_condition(_tree) + ): + if not filter_condition or filter_condition(_tree): + yield _tree + next_level_nodes = list(_tree.children) + if reverse_indicator: + next_level_nodes = next_level_nodes[::-1] + next_level.extend(next_level_nodes) + if len(next_level): + yield from _zigzag_iter( + next_level[::-1], reverse_indicator=not reverse_indicator + ) + + yield from _zigzag_iter([tree]) + + +def zigzaggroup_iter( + tree: BaseNodeT, + filter_condition: Optional[Callable[[BaseNodeT], bool]] = None, + stop_condition: Optional[Callable[[BaseNodeT], bool]] = None, + max_depth: int = 0, +) -> Iterable[Iterable[BaseNodeT]]: + """Iterate through all children of a tree. + + ZigZag Group Iteration Algorithm + 1. Recursively traverse the nodes on same level, in a zigzag manner across different levels, + returns nodes level by level in a nested list. + + Examples: + >>> from bigtree import Node, list_to_tree, zigzaggroup_iter + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [[node.node_name for node in group] for group in zigzaggroup_iter(root)] + [['a'], ['c', 'b'], ['d', 'e', 'f'], ['h', 'g']] + + >>> [[node.node_name for node in group] for group in zigzaggroup_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + [['a'], [], ['d', 'e', 'f'], ['g']] + + >>> [[node.node_name for node in group] for group in zigzaggroup_iter(root, stop_condition=lambda x: x.node_name == "e")] + [['a'], ['c', 'b'], ['d', 'f']] + + >>> [[node.node_name for node in group] for group in zigzaggroup_iter(root, max_depth=3)] + [['a'], ['c', 'b'], ['d', 'e', 'f']] + + Args: + tree (BaseNode): input tree + filter_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Return node if condition evaluates to `True` + stop_condition (Optional[Callable[[BaseNode], bool]]): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, defaults to None + + Returns: + (Iterable[Iterable[BaseNode]]) + """ + + def _zigzaggroup_iter( + trees: List[BaseNodeT], reverse_indicator: bool = False + ) -> Iterable[Iterable[BaseNodeT]]: + """Iterate through all children of a tree. + + Args: + trees (List[BaseNode]): trees to get children for next level + reverse_indicator (bool): indicator whether it is in reverse order + + Returns: + (Iterable[Iterable[BaseNode]]) + """ + current_tree = [] + next_level = [] + for _tree in trees: + if (not max_depth or not _tree.depth > max_depth) and ( + not stop_condition or not stop_condition(_tree) + ): + if not filter_condition or filter_condition(_tree): + current_tree.append(_tree) + next_level_nodes = [_child for _child in _tree.children if _child] + if reverse_indicator: + next_level_nodes = next_level_nodes[::-1] + next_level.extend(next_level_nodes) + yield tuple(current_tree) + if len(next_level) and (not max_depth or not next_level[0].depth > max_depth): + yield from _zigzaggroup_iter( + next_level[::-1], reverse_indicator=not reverse_indicator + ) + + yield from _zigzaggroup_iter([tree]) + + +def dag_iterator(dag: DAGNodeT) -> Iterable[Tuple[DAGNodeT, DAGNodeT]]: + """Iterate through all nodes of a Directed Acyclic Graph (DAG). + Note that node names must be unique. + Note that DAG must at least have two nodes to be shown on graph. + + 1. Visit the current node. + 2. Recursively traverse the current node's parents. + 3. Recursively traverse the current node's children. + + Examples: + >>> from bigtree import DAGNode, dag_iterator + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(a)] + [('a', 'c'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('d', 'e')] + + Args: + dag (DAGNode): input dag + + Returns: + (Iterable[Tuple[DAGNode, DAGNode]]) + """ + visited_nodes = set() + + def _dag_iterator(node: DAGNodeT) -> Iterable[Tuple[DAGNodeT, DAGNodeT]]: + """Iterate through all children of a DAG. + + Args: + node (DAGNode): current node + + Returns: + (Iterable[Tuple[DAGNode, DAGNode]]) + """ + node_name = node.node_name + visited_nodes.add(node_name) + + # Parse upwards + for parent in node.parents: + parent_name = parent.node_name + if parent_name not in visited_nodes: + yield parent, node + + # Parse downwards + for child in node.children: + child_name = child.node_name + if child_name not in visited_nodes: + yield node, child + + # Parse upwards + for parent in node.parents: + parent_name = parent.node_name + if parent_name not in visited_nodes: + yield from _dag_iterator(parent) + + # Parse downwards + for child in node.children: + child_name = child.node_name + if child_name not in visited_nodes: + yield from _dag_iterator(child) + + yield from _dag_iterator(dag) diff --git a/python310/packages/bigtree/utils/plot.py b/python310/packages/bigtree/utils/plot.py new file mode 100644 index 0000000..b8decb2 --- /dev/null +++ b/python310/packages/bigtree/utils/plot.py @@ -0,0 +1,354 @@ +from typing import Optional, TypeVar + +from bigtree.node.basenode import BaseNode + +T = TypeVar("T", bound=BaseNode) + +__all__ = [ + "reingold_tilford", +] + + +def reingold_tilford( + tree_node: T, + sibling_separation: float = 1.0, + subtree_separation: float = 1.0, + level_separation: float = 1.0, + x_offset: float = 0.0, + y_offset: float = 0.0, +) -> None: + """ + Algorithm for drawing tree structure, retrieves `(x, y)` coordinates for a tree structure. + Adds `x` and `y` attributes to every node in the tree. Modifies tree in-place. + + This algorithm[1] is an improvement over Reingold Tilford algorithm[2]. + + According to Reingold Tilford's paper, a tree diagram should satisfy the following aesthetic rules, + + 1. Nodes at the same depth should lie along a straight line, and the straight lines defining the depths should be parallel. + 2. A left child should be positioned to the left of its parent node and a right child to the right. + 3. A parent should be centered over their children. + 4. A tree and its mirror image should produce drawings that are reflections of one another; a subtree should be drawn the same way regardless of where it occurs in the tree. + + Examples: + >>> from bigtree import reingold_tilford, list_to_tree + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> root.show() + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> reingold_tilford(root) + >>> root.show(attr_list=["x", "y"]) + a [x=1.25, y=3.0] + ├── b [x=0.5, y=2.0] + │ ├── d [x=0.0, y=1.0] + │ └── e [x=1.0, y=1.0] + │ ├── g [x=0.5, y=0.0] + │ └── h [x=1.5, y=0.0] + └── c [x=2.0, y=2.0] + └── f [x=2.0, y=1.0] + + References + + - [1] Walker, J. (1991). Positioning Nodes for General Trees. https://www.drdobbs.com/positioning-nodes-for-general-trees/184402320?pgno=4 + - [2] Reingold, E., Tilford, J. (1981). Tidier Drawings of Trees. IEEE Transactions on Software Engineering. https://reingold.co/tidier-drawings.pdf + + Args: + tree_node (BaseNode): tree to compute (x, y) coordinate + sibling_separation (float): minimum distance between adjacent siblings of the tree + subtree_separation (float): minimum distance between adjacent subtrees of the tree + level_separation (float): fixed distance between adjacent levels of the tree + x_offset (float): graph offset of x-coordinates + y_offset (float): graph offset of y-coordinates + """ + _first_pass(tree_node, sibling_separation, subtree_separation) + x_adjustment = _second_pass(tree_node, level_separation, x_offset, y_offset) + _third_pass(tree_node, x_adjustment) + + +def _first_pass( + tree_node: T, sibling_separation: float, subtree_separation: float +) -> None: + """ + Performs post-order traversal of tree and assigns `x`, `mod` and `shift` values to each node. + Modifies tree in-place. + + Notation: + - `lsibling`: left-sibling of node + - `lchild`: last child of node + - `fchild`: first child of node + - `midpoint`: midpoint of node wrt children, :math:`midpoint = (lchild.x + fchild.x) / 2` + - `sibling distance`: sibling separation + - `subtree distance`: subtree separation + + There are two parts in the first pass, + + 1. In the first part, we assign `x` and `mod` values to each node + + `x` value is the initial x-position of each node purely based on the node's position + - :math:`x = 0` for leftmost node and :math:`x = lsibling.x + sibling distance` for other nodes + - Special case when leftmost node has children, then it will try to center itself, :math:`x = midpoint` + + `mod` value is the amount to shift the subtree (all descendant nodes excluding itself) to make the children centered with itself + - :math:`mod = 0` for node does not have children (no need to shift subtree) or it is a leftmost node (parent is already centered, from above point) + - Special case when non-leftmost nodes have children, :math:`mod = x - midpoint` + + 2. In the second part, we assign `shift` value of nodes due to overlapping subtrees. + + For each node on the same level, ensure that the leftmost descendant does not intersect with the rightmost + descendant of any left sibling at every subsequent level. Intersection happens when the subtrees are not + at least `subtree distance` apart. + + If there are any intersections, shift the whole subtree by a new `shift` value, shift any left sibling by a + fraction of `shift` value, and shift any right sibling by `shift` + a multiple of the fraction of + `shift` value to keep nodes centralized at the level. + + Args: + tree_node (BaseNode): tree to compute (x, y) coordinate + sibling_separation (float): minimum distance between adjacent siblings of the tree + subtree_separation (float): minimum distance between adjacent subtrees of the tree + """ + # Post-order iteration (LRN) + for child in tree_node.children: + _first_pass(child, sibling_separation, subtree_separation) + + _x = 0.0 + _mod = 0.0 + _shift = 0.0 + _midpoint = 0.0 + + if tree_node.is_root: + tree_node.set_attrs({"x": _get_midpoint_of_children(tree_node)}) + tree_node.set_attrs({"mod": _mod}) + tree_node.set_attrs({"shift": _shift}) + + else: + # First part - assign x and mod values + + if tree_node.children: + _midpoint = _get_midpoint_of_children(tree_node) + + # Non-leftmost node + if tree_node.left_sibling: + _x = tree_node.left_sibling.get_attr("x") + sibling_separation + if tree_node.children: + _mod = _x - _midpoint + # Leftmost node + else: + if tree_node.children: + _x = _midpoint + + tree_node.set_attrs({"x": _x}) + tree_node.set_attrs({"mod": _mod}) + tree_node.set_attrs({"shift": tree_node.get_attr("shift", _shift)}) + + # Second part - assign shift values due to overlapping subtrees + + parent_node = tree_node.parent + tree_node_idx = parent_node.children.index(tree_node) + if tree_node_idx: + for idx_node in range(tree_node_idx): + left_subtree = parent_node.children[idx_node] + _shift = max( + _shift, + _get_subtree_shift( + left_subtree=left_subtree, + right_subtree=tree_node, + left_idx=idx_node, + right_idx=tree_node_idx, + subtree_separation=subtree_separation, + ), + ) + + # Shift siblings (left siblings, itself, right siblings) accordingly + for multiple, sibling in enumerate(parent_node.children): + sibling.set_attrs( + { + "shift": sibling.get_attr("shift", 0) + + (_shift * multiple / tree_node_idx) + } + ) + + +def _get_midpoint_of_children(tree_node: BaseNode) -> float: + """Get midpoint of children of a node + + Args: + tree_node (BaseNode): tree node to obtain midpoint of their child/children + + Returns: + (float) + """ + if tree_node.children: + first_child_x: float = tree_node.children[0].get_attr("x") + tree_node.children[ + 0 + ].get_attr("shift") + last_child_x: float = tree_node.children[-1].get_attr("x") + tree_node.children[ + -1 + ].get_attr("shift") + return (last_child_x + first_child_x) / 2 + return 0.0 + + +def _get_subtree_shift( + left_subtree: T, + right_subtree: T, + left_idx: int, + right_idx: int, + subtree_separation: float, + left_cum_shift: float = 0, + right_cum_shift: float = 0, + cum_shift: float = 0, + initial_run: bool = True, +) -> float: + """Get shift amount to shift the right subtree towards the right such that it does not overlap with the left subtree + + Args: + left_subtree (BaseNode): left subtree, with right contour to be traversed + right_subtree (BaseNode): right subtree, with left contour to be traversed + left_idx (int): index of left subtree, to compute overlap for relative shift (constant across iteration) + right_idx (int): index of right subtree, to compute overlap for relative shift (constant across iteration) + subtree_separation (float): minimum distance between adjacent subtrees of the tree (constant across iteration) + left_cum_shift (float): cumulative `mod + shift` for left subtree from the ancestors, defaults to 0 + right_cum_shift (float): cumulative `mod + shift` for right subtree from the ancestors, defaults to 0 + cum_shift (float): cumulative shift amount for right subtree, defaults to 0 + initial_run (bool): indicates whether left_subtree and right_subtree are the main subtrees, defaults to True + + Returns: + (float) + """ + new_shift = 0.0 + + if not initial_run: + x_left = ( + left_subtree.get_attr("x") + left_subtree.get_attr("shift") + left_cum_shift + ) + x_right = ( + right_subtree.get_attr("x") + + right_subtree.get_attr("shift") + + right_cum_shift + + cum_shift + ) + new_shift = max( + (x_left + subtree_separation - x_right) / (1 - left_idx / right_idx), 0 + ) + + # Search for a left sibling of left_subtree that has children + while left_subtree and not left_subtree.children and left_subtree.left_sibling: + left_subtree = left_subtree.left_sibling + + # Search for a right sibling of right_subtree that has children + while ( + right_subtree and not right_subtree.children and right_subtree.right_sibling + ): + right_subtree = right_subtree.right_sibling + + if left_subtree.children and right_subtree.children: + # Iterate down the level, for the rightmost child of left_subtree and the leftmost child of right_subtree + return _get_subtree_shift( + left_subtree=left_subtree.children[-1], + right_subtree=right_subtree.children[0], + left_idx=left_idx, + right_idx=right_idx, + subtree_separation=subtree_separation, + left_cum_shift=( + left_cum_shift + + left_subtree.get_attr("mod") + + left_subtree.get_attr("shift") + ), + right_cum_shift=( + right_cum_shift + + right_subtree.get_attr("mod") + + right_subtree.get_attr("shift") + ), + cum_shift=cum_shift + new_shift, + initial_run=False, + ) + + return cum_shift + new_shift + + +def _second_pass( + tree_node: T, + level_separation: float, + x_offset: float, + y_offset: float, + cum_mod: Optional[float] = 0.0, + max_depth: Optional[int] = None, + x_adjustment: Optional[float] = 0.0, +) -> float: + """ + Performs pre-order traversal of tree and determines the final `x` and `y` values for each node. + Modifies tree in-place. + + Notation: + - `depth`: maximum depth of tree + - `distance`: level separation + - `x'`: x offset + - `y'`: y offset + + Final position of each node + - :math:`x = node.x + node.shift + sum(ancestor.mod) + x'` + - :math:`y = (depth - node.depth) * distance + y'` + + Args: + tree_node (BaseNode): tree to compute (x, y) coordinate + level_separation (float): fixed distance between adjacent levels of the tree (constant across iteration) + x_offset (float): graph offset of x-coordinates (constant across iteration) + y_offset (float): graph offset of y-coordinates (constant across iteration) + cum_mod (Optional[float]): cumulative `mod + shift` for tree/subtree from the ancestors + max_depth (Optional[int]): maximum depth of tree (constant across iteration) + x_adjustment (Optional[float]): amount of x-adjustment for third pass, in case any x-coordinates goes below 0 + + Returns + (float) + """ + if not max_depth: + max_depth = tree_node.max_depth + + final_x: float = ( + tree_node.get_attr("x") + tree_node.get_attr("shift") + cum_mod + x_offset + ) + final_y: float = (max_depth - tree_node.depth) * level_separation + y_offset + tree_node.set_attrs({"x": final_x, "y": final_y}) + + # Pre-order iteration (NLR) + if tree_node.children: + return max( + [ + _second_pass( + child, + level_separation, + x_offset, + y_offset, + cum_mod + tree_node.get_attr("mod") + tree_node.get_attr("shift"), + max_depth, + x_adjustment, + ) + for child in tree_node.children + ] + ) + return max(x_adjustment, -final_x) + + +def _third_pass(tree_node: BaseNode, x_adjustment: float) -> None: + """Adjust all x-coordinates by an adjustment value so that every x-coordinate is greater than or equal to 0. + Modifies tree in-place. + + Args: + tree_node (BaseNode): tree to compute (x, y) coordinate + x_adjustment (float): amount of adjustment for x-coordinates (constant across iteration) + """ + if x_adjustment: + tree_node.set_attrs({"x": tree_node.get_attr("x") + x_adjustment}) + + # Pre-order iteration (NLR) + for child in tree_node.children: + _third_pass(child, x_adjustment) diff --git a/python310/packages/bigtree/workflows/__init__.py b/python310/packages/bigtree/workflows/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python310/packages/bigtree/workflows/app_calendar.py b/python310/packages/bigtree/workflows/app_calendar.py new file mode 100644 index 0000000..d6482c1 --- /dev/null +++ b/python310/packages/bigtree/workflows/app_calendar.py @@ -0,0 +1,200 @@ +from __future__ import annotations + +import datetime as dt +from typing import Any, Optional, Union + +from bigtree.node.node import Node +from bigtree.tree.construct import add_path_to_tree +from bigtree.tree.export import tree_to_dataframe +from bigtree.tree.search import find_full_path, findall + +try: + import pandas as pd +except ImportError: # pragma: no cover + pd = None + + +class Calendar: + """ + Calendar Implementation with Big Tree. + - Calendar has four levels - year, month, day, and event name (with event attributes) + + Examples: + *Initializing and Adding Events* + + >>> from bigtree import Calendar + >>> calendar = Calendar("My Calendar") + >>> calendar.add_event("Gym", "2023-01-01 18:00") + >>> calendar.add_event("Dinner", "2023-01-01", date_format="%Y-%m-%d", budget=20) + >>> calendar.add_event("Gym", "2023-01-02 18:00") + >>> calendar.show() + My Calendar + 2023-01-01 00:00:00 - Dinner (budget: 20) + 2023-01-01 18:00:00 - Gym + 2023-01-02 18:00:00 - Gym + + *Search for Events* + + >>> calendar.find_event("Gym") + 2023-01-01 18:00:00 - Gym + 2023-01-02 18:00:00 - Gym + + *Removing Events* + + >>> import datetime as dt + >>> calendar.delete_event("Gym", dt.date(2023, 1, 1)) + >>> calendar.show() + My Calendar + 2023-01-01 00:00:00 - Dinner (budget: 20) + 2023-01-02 18:00:00 - Gym + + *Export Calendar* + + >>> calendar.to_dataframe() + path name date time budget + 0 /My Calendar/2023/01/01/Dinner Dinner 2023-01-01 00:00:00 20.0 + 1 /My Calendar/2023/01/02/Gym Gym 2023-01-02 18:00:00 NaN + """ + + def __init__(self, name: str): + self.calendar = Node(name) + self.__sorted = True + + def add_event( + self, + event_name: str, + event_datetime: Union[str, dt.datetime], + date_format: str = "%Y-%m-%d %H:%M", + **kwargs: Any, + ) -> None: + """Add event to calendar + + Args: + event_name (str): event name to be added + event_datetime (Union[str, dt.datetime]): event date and time + date_format (str): specify datetime format if event_datetime is str + """ + if isinstance(event_datetime, str): + event_datetime = dt.datetime.strptime(event_datetime, date_format) + year, month, day, date, time = ( + event_datetime.year, + str(event_datetime.month).zfill(2), + str(event_datetime.day).zfill(2), + event_datetime.date(), + event_datetime.time(), + ) + event_path = f"{self.calendar.node_name}/{year}/{month}/{day}/{event_name}" + event_attr = {"date": date, "time": time, **kwargs} + if find_full_path(self.calendar, event_path): + print( + f"Event {event_name} exists on {date}, overwriting information for {event_name}" + ) + add_path_to_tree( + tree=self.calendar, + path=event_path, + node_attrs=event_attr, + ) + self.__sorted = False + + def delete_event( + self, event_name: str, event_date: Optional[dt.date] = None + ) -> None: + """Delete event from calendar + + Args: + event_name (str): event name to be deleted + event_date (dt.date): event date to be deleted + """ + if event_date: + year, month, day = ( + event_date.year, + str(event_date.month).zfill(2), + str(event_date.day).zfill(2), + ) + event_path = f"{self.calendar.node_name}/{year}/{month}/{day}/{event_name}" + event = find_full_path(self.calendar, event_path) + if event: + self._delete_event(event) + else: + print(f"Event {event_name} does not exist on {event_date}") + else: + for event in findall( + self.calendar, lambda node: node.node_name == event_name + ): + self._delete_event(event) + + def find_event(self, event_name: str) -> None: + """Find event by name, prints result to console + + Args: + event_name (str): event name + """ + if not self.__sorted: + self._sort() + for event in findall(self.calendar, lambda node: node.node_name == event_name): + self._show(event) + + def show(self) -> None: + """Show calendar, prints result to console""" + if not len(self.calendar.children): + raise Exception("Calendar is empty!") + if not self.__sorted: + self._sort() + print(self.calendar.node_name) + for event in self.calendar.leaves: + self._show(event) + + def to_dataframe(self) -> pd.DataFrame: + """ + Export calendar to DataFrame + + Returns: + (pd.DataFrame) + """ + if not len(self.calendar.children): + raise Exception("Calendar is empty!") + data = tree_to_dataframe(self.calendar, all_attrs=True, leaf_only=True) + compulsory_cols = ["path", "name", "date", "time"] + other_cols = list(set(data.columns) - set(compulsory_cols)) + return data[compulsory_cols + other_cols] + + def _delete_event(self, event: Node) -> None: + """Private method to delete event, delete parent node as well + + Args: + event (Node): event to be deleted + """ + if len(event.parent.children) == 1: + if event.parent.parent: + self._delete_event(event.parent) + event.parent.parent = None + else: + event.parent = None + + def _sort(self) -> None: + """Private method to sort calendar by event date, followed by event time""" + for day_event in findall(self.calendar, lambda node: node.depth <= 4): + if day_event.depth < 4: + day_event.sort(key=lambda attr: attr.node_name) + else: + day_event.sort(key=lambda attr: attr.time) + self.__sorted = True + + @staticmethod + def _show(event: Node) -> None: + """Private method to show event, handles the formatting of event + Prints result to console + + Args: + event (Node): event + """ + event_datetime = dt.datetime.combine( + event.get_attr("date"), event.get_attr("time") + ) + event_attrs = event.describe( + exclude_attributes=["date", "time", "name"], exclude_prefix="_" + ) + event_attrs_str = ", ".join([f"{attr[0]}: {attr[1]}" for attr in event_attrs]) + if event_attrs_str: + event_attrs_str = f" ({event_attrs_str})" + print(f"{event_datetime} - {event.node_name}{event_attrs_str}") diff --git a/python310/packages/bigtree/workflows/app_todo.py b/python310/packages/bigtree/workflows/app_todo.py new file mode 100644 index 0000000..f735ce7 --- /dev/null +++ b/python310/packages/bigtree/workflows/app_todo.py @@ -0,0 +1,261 @@ +from __future__ import annotations + +import json +import logging +from typing import Any, List, Union + +from bigtree.node.node import Node +from bigtree.tree.construct import nested_dict_to_tree +from bigtree.tree.export import print_tree, tree_to_nested_dict +from bigtree.tree.search import find_child_by_name, find_name + +logging.getLogger(__name__).addHandler(logging.NullHandler()) + + +class AppToDo: + """ + To-Do List Implementation with Big Tree. + - To-Do List has three levels - app name, list name, and item name. + - If list name is not given, item will be assigned to a `General` list. + + Examples: + *Initializing and Adding Items* + + >>> from bigtree import AppToDo + >>> app = AppToDo("To Do App") + >>> app.add_item(item_name="Homework 1", list_name="School") + >>> app.add_item(item_name=["Milk", "Bread"], list_name="Groceries", description="Urgent") + >>> app.add_item(item_name="Cook") + >>> app.show() + To Do App + ├── School + │ └── Homework 1 + ├── Groceries + │ ├── Milk [description=Urgent] + │ └── Bread [description=Urgent] + └── General + └── Cook + + *Reorder List and Item* + + >>> app.prioritize_list(list_name="General") + >>> app.show() + To Do App + ├── General + │ └── Cook + ├── School + │ └── Homework 1 + └── Groceries + ├── Milk [description=Urgent] + └── Bread [description=Urgent] + + >>> app.prioritize_item(item_name="Bread") + >>> app.show() + To Do App + ├── General + │ └── Cook + ├── School + │ └── Homework 1 + └── Groceries + ├── Bread [description=Urgent] + └── Milk [description=Urgent] + + *Removing Items* + + >>> app.remove_item("Homework 1") + >>> app.show() + To Do App + ├── General + │ └── Cook + └── Groceries + ├── Bread [description=Urgent] + └── Milk [description=Urgent] + + *Exporting and Importing List* + + >>> app.save("assets/docstr/list.json") + >>> app2 = AppToDo.load("assets/docstr/list.json") + >>> app2.show() + To Do App + ├── General + │ └── Cook + └── Groceries + ├── Bread [description=Urgent] + └── Milk [description=Urgent] + """ + + def __init__( + self, + app_name: str = "", + ): + """Initialize To-Do app + + Args: + app_name (str): name of to-do app, optional + """ + self._root = Node(app_name) + + def add_list(self, list_name: str, **kwargs: Any) -> Node: + """Add list to app + + If list is present, return list node, else a new list will be created + + Args: + list_name (str): name of list + + Returns: + (Node) + """ + list_node = find_child_by_name(self._root, list_name) + if not list_node: + list_node = Node(list_name, parent=self._root, **kwargs) + logging.info(f"Created list {list_name}") + return list_node + + def prioritize_list(self, list_name: str) -> None: + """Prioritize list in app, shift it to be the first list + + Args: + list_name (str): name of list + """ + list_node = find_child_by_name(self._root, list_name) + if not list_node: + raise ValueError(f"List {list_name} not found") + current_children = list(self._root.children) + current_children.remove(list_node) + current_children.insert(0, list_node) + self._root.children = current_children # type: ignore + + def add_item( + self, item_name: Union[str, List[str]], list_name: str = "", **kwargs: Any + ) -> None: + """Add items to list + + Args: + item_name (str/List[str]): items to be added + list_name (str): list to add items to, optional + """ + if not isinstance(item_name, str) and not isinstance(item_name, list): + raise TypeError("Invalid data type for item") + if isinstance(item_name, str): + item_name = [item_name] + + # Get list to add to + if list_name: + list_node = self.add_list(list_name) + else: + list_node = self.add_list("General") + + # Add items to list + for _item in item_name: + _ = Node(_item, parent=list_node, **kwargs) + logging.info(f"Created item(s) {', '.join(item_name)}") + + def remove_item( + self, item_name: Union[str, List[str]], list_name: str = "" + ) -> None: + """Remove items from list + + Args: + item_name (str/List[str]): items to be added + list_name (str): list to add items to, optional + """ + if not isinstance(item_name, str) and not isinstance(item_name, list): + raise TypeError("Invalid data type for item") + if isinstance(item_name, str): + item_name = [item_name] + + # Check if items can be found + items_to_remove = [] + parent_to_check: set[Node] = set() + if list_name: + list_node = find_child_by_name(self._root, list_name) + if not list_node: + raise ValueError(f"List {list_name} does not exist!") + for _item in item_name: + item_node = find_child_by_name(list_node, _item) + if not item_node: + raise ValueError(f"Item {_item} does not exist!") + assert isinstance(item_node.parent, Node) # for mypy type checking + items_to_remove.append(item_node) + parent_to_check.add(item_node.parent) + else: + for _item in item_name: + item_node = find_name(self._root, _item) + if not item_node: + raise ValueError(f"Item {_item} does not exist!") + assert isinstance(item_node.parent, Node) # for mypy type checking + items_to_remove.append(item_node) + parent_to_check.add(item_node.parent) + + # Remove items + for item_to_remove in items_to_remove: + if item_to_remove.depth != 3: + raise ValueError( + f"Check item to remove {item_to_remove} is an item at item-level" + ) + item_to_remove.parent = None + logging.info( + f"Removed item(s) {', '.join(item.node_name for item in items_to_remove)}" + ) + + # Remove list if empty + for list_node in parent_to_check: + if not len(list(list_node.children)): + list_node.parent = None + logging.info(f"Removed list {list_node.node_name}") + + def prioritize_item(self, item_name: str) -> None: + """Prioritize item in list, shift it to be the first item in list + + Args: + item_name (str): name of item + """ + item_node = find_name(self._root, item_name) + if not item_node: + raise ValueError(f"Item {item_name} not found") + if item_node.depth != 3: + raise ValueError(f"{item_name} is not an item") + assert isinstance(item_node.parent, Node) # for mypy type checking + current_parent = item_node.parent + current_children = list(current_parent.children) + current_children.remove(item_node) + current_children.insert(0, item_node) + current_parent.children = current_children # type: ignore + + def show(self, **kwargs: Any) -> None: + """Print tree to console""" + print_tree(self._root, all_attrs=True, **kwargs) + + @staticmethod + def load(json_path: str) -> AppToDo: + """Load To-Do app from json + + Args: + json_path (str): json load path + + Returns: + (Self) + """ + if not json_path.endswith(".json"): + raise ValueError("Path should end with .json") + + with open(json_path, "r") as fp: + app_dict = json.load(fp) + _app = AppToDo("dummy") + AppToDo.__setattr__(_app, "_root", nested_dict_to_tree(app_dict["root"])) + return _app + + def save(self, json_path: str) -> None: + """Save To-Do app as json + + Args: + json_path (str): json save path + """ + if not json_path.endswith(".json"): + raise ValueError("Path should end with .json") + + node_dict = tree_to_nested_dict(self._root, all_attrs=True) + app_dict = {"root": node_dict} + with open(json_path, "w") as fp: + json.dump(app_dict, fp) diff --git a/python310/packages/elog.pth b/python310/packages/elog.pth new file mode 100644 index 0000000..92dcaa9 --- /dev/null +++ b/python310/packages/elog.pth @@ -0,0 +1 @@ +./elog-1.3.4-py3.7.egg diff --git a/python310/packages/elog/__init__.py b/python310/packages/elog/__init__.py new file mode 100644 index 0000000..f037dec --- /dev/null +++ b/python310/packages/elog/__init__.py @@ -0,0 +1,13 @@ +from elog.logbook import Logbook +from elog.logbook import LogbookError, LogbookAuthenticationError, LogbookServerProblem, LogbookMessageRejected, \ + LogbookInvalidMessageID, LogbookInvalidAttachmentType + + +def open(*args, **kwargs): + """ + Will return a Logbook object. All arguments are passed to the logbook constructor. + :param args: + :param kwargs: + :return: Logbook() instance + """ + return Logbook(*args, **kwargs) diff --git a/python310/packages/elog/logbook.py b/python310/packages/elog/logbook.py new file mode 100644 index 0000000..c2bfe26 --- /dev/null +++ b/python310/packages/elog/logbook.py @@ -0,0 +1,571 @@ +import requests +import urllib.parse +import os +import builtins +import re +from elog.logbook_exceptions import * +from datetime import datetime + + +class Logbook(object): + """ + Logbook provides methods to interface with logbook on location: "server:port/subdir/logbook". User can create, + edit, delete logbook messages. + """ + + def __init__(self, hostname, logbook='', port=None, user=None, password=None, subdir='', use_ssl=True, + encrypt_pwd=True): + """ + :param hostname: elog server hostname. If whole url is specified here, it will be parsed and arguments: + "logbook, port, subdir, use_ssl" will be overwritten by parsed values. + :param logbook: name of the logbook on the elog server + :param port: elog server port (if not specified will default to '80' if use_ssl=False or '443' if use_ssl=True + :param user: username (if authentication needed) + :param password: password (if authentication needed) Password will be encrypted with sha256 unless + encrypt_pwd=False (default: True) + :param subdir: subdirectory of logbooks locations + :param use_ssl: connect using ssl (ignored if url starts with 'http://'' or 'https://'? + :param encrypt_pwd: To avoid exposing password in the code, this flag can be set to False and password + will then be handled as it is (user needs to provide sha256 encrypted password with + salt= '' and rounds=5000) + :return: + """ + hostname = hostname.strip() + + # parse url to see if some parameters are defined with url + parsed_url = urllib.parse.urlsplit(hostname) + + # ---- handle SSL ----- + # hostname must be modified according to use_ssl flag. If hostname starts with https:// or http:// + # the use_ssl flag is ignored + url_scheme = parsed_url.scheme + if url_scheme == 'http': + use_ssl = False + + elif url_scheme == 'https': + use_ssl = True + + elif not url_scheme: + # add http or https + if use_ssl: + url_scheme = 'https' + else: + url_scheme = 'http' + + # ---- handle port ----- + # 1) by default use port defined in the url + # 2) remove any 'default' ports such as 80 for http and 443 for https + # 3) if port not defined in url and not 'default' add it to netloc + + netloc = parsed_url.netloc + if netloc == "" and "localhost" in hostname: + netloc = 'localhost' + netloc_split = netloc.split(':') + if len(netloc_split) > 1: + # port defined in url --> remove if needed + port = netloc_split[1] + if (port == 80 and not use_ssl) or (port == 443 and use_ssl): + netloc = netloc_split[0] + + else: + # add port info if needed + if port is not None and not (port == 80 and not use_ssl) and not (port == 443 and use_ssl): + netloc += ':{}'.format(port) + + # ---- handle subdir and logbook ----- + # parsed_url.path = /// + + # Remove last '/' for easier parsing + url_path = parsed_url.path + if url_path.endswith('/'): + url_path = url_path[:-1] + + splitted_path = url_path.split('/') + if url_path and len(splitted_path) > 1: + # If here ... then at least some part of path is defined. + + # If logbook defined --> treat path current path as subdir and add logbook at the end + # to define the full path. Else treat existing path as /. + # Put first and last '/' back on its place + if logbook: + url_path += '/{}'.format(logbook) + else: + logbook = splitted_path[-1] + + else: + # There is nothing. Use arguments. + url_path = subdir + '/' + logbook + + # urllib.parse.quote replaces special characters with %xx escapes + # self._logbook_path = urllib.parse.quote('/' + url_path + '/').replace('//', '/') + self._logbook_path = ('/' + url_path + '/').replace('//', '/') + + self._url = url_scheme + '://' + netloc + self._logbook_path + self.logbook = logbook + self._user = user + self._password = _handle_pswd(password, encrypt_pwd) + + def post(self, message, msg_id=None, reply=False, attributes=None, attachments=None, encoding=None, + **kwargs): + """ + Posts message to the logbook. If msg_id is not specified new message will be created, otherwise existing + message will be edited, or a reply (if reply=True) to it will be created. This method returns the msg_id + of the newly created message. + + :param message: string with message text + :param msg_id: ID number of message to edit or reply. If not specified new message is created. + :param reply: If 'True' reply to existing message is created instead of editing it + :param attributes: Dictionary of attributes. Following attributes are used internally by the elog and will be + ignored: Text, Date, Encoding, Reply to, In reply to, Locked by, Attachment + :param attachments: list of: + - file like objects which read() will return bytes (if file_like_object.name is not + defined, default name "attachment" will be used. + - paths to the files + All items will be appended as attachment to the elog entry. In case of unknown + attachment an exception LogbookInvalidAttachment will be raised. + :param encoding: Defines encoding of the message. Can be: 'plain' -> plain text, 'html'->html-text, + 'ELCode' --> elog formatting syntax + :param kwargs: Anything in the kwargs will be interpreted as attribute. e.g.: logbook.post('Test text', + Author='Rok Vintar), "Author" will be sent as an attribute. If named same as one of the + attributes defined in "attributes", kwargs will have priority. + + :return: msg_id + """ + + attributes = attributes or {} + attributes = {**attributes, **kwargs} # kwargs as attributes with higher priority + + attachments = attachments or [] + + if encoding is not None: + if encoding not in ['plain', 'HTML', 'ELCode']: + raise LogbookMessageRejected('Invalid message encoding. Valid options: plain, HTML, ELCode.') + attributes['Encoding'] = encoding + + attributes_to_edit = dict() + if msg_id: + # Message exists, we can continue + if reply: + # Verify that there is a message on the server, otherwise do not reply to it! + self._check_if_message_on_server(msg_id) # raises exception in case of none existing message + + attributes['reply_to'] = str(msg_id) + + else: # Edit existing + attributes['edit_id'] = str(msg_id) + attributes['skiplock'] = '1' + + # Handle existing attachments + msg_to_edit, attributes_to_edit, attach_to_edit = self.read(msg_id) + + i = 0 + for attachment in attach_to_edit: + if attachment: + # Existing attachments must be passed as regular arguments attachment with value= file name + # Read message returnes full urls to existing attachments: + # :[][/// + attributes['attachment' + str(i)] = os.path.basename(attachment) + i += 1 + + for attribute, data in attributes.items(): + new_data = attributes.get(attribute) + if new_data is not None: + attributes_to_edit[attribute] = new_data + else: + # As we create a new message, specify creation time if not already specified in attributes + if 'When' not in attributes: + attributes['When'] = int(datetime.now().timestamp()) + + if not attributes_to_edit: + attributes_to_edit = attributes + # Remove any attributes that should not be sent + _remove_reserved_attributes(attributes_to_edit) + + if attachments: + files_to_attach, objects_to_close = self._prepare_attachments(attachments) + else: + objects_to_close = list() + files_to_attach = list() + + # Make requests module think that Text is a "file". This is the only way to force requests to send data as + # multipart/form-data even if there are no attachments. Elog understands only multipart/form-data + files_to_attach.append(('Text', ('', message))) + + # Base attributes are common to all messages + self._add_base_msg_attributes(attributes_to_edit) + + # Keys in attributes cannot have certain characters like whitespaces or dashes for the http request + attributes_to_edit = _replace_special_characters_in_attribute_keys(attributes_to_edit) + + try: + response = requests.post(self._url, data=attributes_to_edit, files=files_to_attach, allow_redirects=False, + verify=False) + # Validate response. Any problems will raise an Exception. + resp_message, resp_headers, resp_msg_id = _validate_response(response) + + # Close file like objects that were opened by the elog (if path + for file_like_object in objects_to_close: + if hasattr(file_like_object, 'close'): + file_like_object.close() + + except requests.RequestException as e: + # Check if message on server. + self._check_if_message_on_server(msg_id) # raises exceptions if no message or no response from server + + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to post a message, ' + 'because of:\n' + + '{0}'.format(e)) + + # Any error before here should raise an exception, but check again for nay case. + if not resp_msg_id or resp_msg_id < 1: + raise LogbookInvalidMessageID('Invalid message ID: ' + str(resp_msg_id) + ' returned') + return resp_msg_id + + def read(self, msg_id): + """ + Reads message from the logbook server and returns tuple of (message, attributes, attachments) where: + message: string with message body + attributes: dictionary of all attributes returned by the logbook + attachments: list of urls to attachments on the logbook server + + :param msg_id: ID of the message to be read + :return: message, attributes, attachments + """ + + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + + try: + self._check_if_message_on_server(msg_id) # raises exceptions if no message or no response from server + response = requests.get(self._url + str(msg_id) + '?cmd=download', headers=request_headers, + allow_redirects=False, verify=False) + + # Validate response. If problems Exception will be thrown. + resp_message, resp_headers, resp_msg_id = _validate_response(response) + + except requests.RequestException as e: + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to read the message with ID: ' + str(msg_id) + + 'because of:\n' + '{0}'.format(e)) + + # Parse message to separate message body, attributes and attachments + attributes = dict() + attachments = list() + + returned_msg = resp_message.decode('utf-8', 'ignore').splitlines() + delimiter_idx = returned_msg.index('========================================') + + message = '\n'.join(returned_msg[delimiter_idx + 1:]) + for line in returned_msg[0:delimiter_idx]: + line = line.split(': ') + data = ''.join(line[1:]) + if line[0] == 'Attachment': + attachments = data.split(',') + # Here are only attachment names, make a full url out of it, so they could be + # recognisable by others, and downloaded if needed + attachments = [self._url + '{0}'.format(i) for i in attachments] + else: + attributes[line[0]] = data + + return message, attributes, attachments + + def delete(self, msg_id): + """ + Deletes message thread (!!!message + all replies!!!) from logbook. + It also deletes all of attachments of corresponding messages from the server. + + :param msg_id: message to be deleted + :return: + """ + + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + + try: + self._check_if_message_on_server(msg_id) # check if something to delete + + response = requests.get(self._url + str(msg_id) + '?cmd=Delete&confirm=Yes', headers=request_headers, + allow_redirects=False, verify=False) + + _validate_response(response) # raises exception if any other error identified + + except requests.RequestException as e: + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to delete the message with ID: ' + str(msg_id) + + 'because of:\n' + '{0}'.format(e)) + + # Additional validation: If successfully deleted then status_code = 302. In case command was not executed at + # all (not English language --> no download command supported) status_code = 200 and the content is just a + # html page of this whole message. + if response.status_code == 200: + raise LogbookServerProblem('Cannot process delete command (only logbooks in English supported).') + + def search(self, search_term, n_results = 20, scope="subtext"): + """ + Searches the logbook and returns the message ids. + + """ + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + + # Putting n_results = 0 crashes the elog. also in the web-gui. + n_results = 1 if n_results < 1 else n_results + + params = { + "mode": "full", + "reverse": "1", + "npp": n_results, + scope: search_term + } + + try: + response = requests.get(self._url, params=params, headers=request_headers, + allow_redirects=False, verify=False) + + # Validate response. If problems Exception will be thrown. + _validate_response(response) + resp_message = response + + except requests.RequestException as e: + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to read message ids ' + 'because of:\n' + '{0}'.format(e)) + + from lxml import html + tree = html.fromstring(resp_message.content) + message_ids = tree.xpath('(//tr/td[@class="list1" or @class="list2"][1])/a/@href') + message_ids = [int(m.split("/")[-1]) for m in message_ids] + return message_ids + + + def get_last_message_id(self): + ids = self.get_message_ids() + if len(ids) > 0: + return ids[0] + else: + return None + + def get_message_ids(self): + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + + try: + response = requests.get(self._url + 'page', headers=request_headers, + allow_redirects=False, verify=False) + + # Validate response. If problems Exception will be thrown. + _validate_response(response) + resp_message = response + + except requests.RequestException as e: + # If here: message is on server but cannot be downloaded (should never happen) + raise LogbookServerProblem('Cannot access logbook server to read message ids ' + 'because of:\n' + '{0}'.format(e)) + + from lxml import html + tree = html.fromstring(resp_message.content) + message_ids = tree.xpath('(//tr/td[@class="list1" or @class="list2"][1])/a/@href') + message_ids = [int(m.split("/")[-1]) for m in message_ids] + return message_ids + + def _check_if_message_on_server(self, msg_id): + """Try to load page for specific message. If there is a htm tag like then there is no + such message. + + :param msg_id: ID of message to be checked + :return: + """ + + request_headers = dict() + if self._user or self._password: + request_headers['Cookie'] = self._make_user_and_pswd_cookie() + try: + response = requests.get(self._url + str(msg_id), headers=request_headers, allow_redirects=False, + verify=False) + + # If there is no message code 200 will be returned (OK) and _validate_response will not recognise it + # but there will be some error in the html code. + resp_message, resp_headers, resp_msg_id = _validate_response(response) + # If there is no message, code 200 will be returned (OK) but there will be some error indication in + # the html code. + if re.findall('.*?', + resp_message.decode('utf-8', 'ignore'), + flags=re.DOTALL): + raise LogbookInvalidMessageID('Message with ID: ' + str(msg_id) + ' does not exist on logbook.') + + except requests.RequestException as e: + raise LogbookServerProblem('No response from the logbook server.\nDetails: ' + '{0}'.format(e)) + + def _add_base_msg_attributes(self, data): + """ + Adds base message attributes which are used by all messages. + :param data: dict of current attributes + :return: content string + """ + data['cmd'] = 'Submit' + data['exp'] = self.logbook + if self._user: + data['unm'] = self._user + if self._password: + data['upwd'] = self._password + + def _prepare_attachments(self, files): + """ + Parses attachments to content objects. Attachments can be: + - file like objects: must have method read() which returns bytes. If it has attribute .name it will be used + for attachment name, otherwise generic attribute name will be used. + - path to the file on disk + + Note that if attachment is is an url pointing to the existing Logbook server it will be ignored and no + exceptions will be raised. This can happen if attachments returned with read_method are resend. + + :param files: list of file like objects or paths + :return: content string + """ + prepared = list() + i = 0 + objects_to_close = list() # objects that are created (opened) by elog must be later closed + for file_obj in files: + if hasattr(file_obj, 'read'): + i += 1 + attribute_name = 'attfile' + str(i) + + filename = attribute_name # If file like object has no name specified use this one + candidate_filename = os.path.basename(file_obj.name) + + if filename: # use only if not empty string + filename = candidate_filename + + elif isinstance(file_obj, str): + # Check if it is: + # - a path to the file --> open file and append + # - an url pointing to the existing Logbook server --> ignore + + filename = "" + attribute_name = "" + + if os.path.isfile(file_obj): + i += 1 + attribute_name = 'attfile' + str(i) + + file_obj = builtins.open(file_obj, 'rb') + filename = os.path.basename(file_obj.name) + + objects_to_close.append(file_obj) + + elif not file_obj.startswith(self._url): + raise LogbookInvalidAttachmentType('Invalid type of attachment: \"' + file_obj + '\".') + else: + raise LogbookInvalidAttachmentType('Invalid type of attachment[' + str(i) + '].') + + prepared.append((attribute_name, (filename, file_obj))) + + return prepared, objects_to_close + + def _make_user_and_pswd_cookie(self): + """ + prepares user name and password cookie. It is sent in header when posting a message. + :return: user name and password value for the Cookie header + """ + cookie = '' + if self._user: + cookie += 'unm=' + self._user + ';' + if self._password: + cookie += 'upwd=' + self._password + ';' + + return cookie + + +def _remove_reserved_attributes(attributes): + """ + Removes elog reserved attributes (from the attributes dict) that can not be sent. + + :param attributes: dictionary of attributes to be cleaned. + :return: + """ + + if attributes: + attributes.get('$@MID@$', None) + attributes.pop('Date', None) + attributes.pop('Attachment', None) + attributes.pop('Text', None) # Remove this one because it will be send attachment like + + +def _replace_special_characters_in_attribute_keys(attributes): + """ + Replaces special characters in elog attribute keys by underscore, otherwise attribute values will be erased in + the http request. This is using the same replacement elog itself is using to handle these cases + + :param attributes: dictionary of attributes to be cleaned. + :return: attributes with replaced keys + """ + return {re.sub('[^0-9a-zA-Z]', '_', key): value for key, value in attributes.items()} + + +def _validate_response(response): + """ Validate response of the request.""" + + msg_id = None + + if response.status_code not in [200, 302]: + # 200 --> OK; 302 --> Found + # Html page is returned with error description (handling errors same way as on original client. Looks + # like there is no other way. + + err = re.findall('.*?', + response.content.decode('utf-8', 'ignore'), + flags=re.DOTALL) + + if len(err) > 0: + # Remove html tags + # If part of the message has: Please go back... remove this part since it is an instruction for + # the user when using browser. + err = re.sub('(?:<.*?>)', '', err[0]) + if err: + raise LogbookMessageRejected('Rejected because of: ' + err) + else: + raise LogbookMessageRejected('Rejected because of unknown error.') + + # Other unknown errors + raise LogbookMessageRejected('Rejected because of unknown error.') + else: + location = response.headers.get('Location') + if location is not None: + if 'has moved' in location: + raise LogbookServerProblem('Logbook server has moved to another location.') + elif 'fail' in location: + raise LogbookAuthenticationError('Invalid username or password.') + else: + # returned locations is something like: '/// + # with urllib.parse.urlparse returns attribute path=// + msg_id = int(urllib.parse.urlsplit(location).path.split('/')[-1]) + + if b'form name=form1' in response.content or b'type=password' in response.content: + # Not to smart to check this way, but no other indication of this kind of error. + # C client does it the same way + raise LogbookAuthenticationError('Invalid username or password.') + + return response.content, response.headers, msg_id + + +def _handle_pswd(password, encrypt=True): + """ + Takes password string and returns password as needed by elog. If encrypt=True then password will be + sha256 encrypted (salt='', rounds=5000). Before returning password, any trailing $5$$ will be removed + independent off encrypt flag. + + :param password: password string + :param encrypt: encrypt password? + :return: elog prepared password + """ + if encrypt and password: + from passlib.hash import sha256_crypt + return sha256_crypt.encrypt(password, salt='', rounds=5000)[4:] + elif password and password.startswith('$5$$'): + return password[4:] + else: + return password diff --git a/python310/packages/elog/logbook_exceptions.py b/python310/packages/elog/logbook_exceptions.py new file mode 100644 index 0000000..64bea05 --- /dev/null +++ b/python310/packages/elog/logbook_exceptions.py @@ -0,0 +1,28 @@ +class LogbookError(Exception): + """ Parent logbook exception.""" + pass + + +class LogbookAuthenticationError(LogbookError): + """ Raise when problem with username and password.""" + pass + + +class LogbookServerProblem(LogbookError): + """ Raise when problem accessing logbook server.""" + pass + + +class LogbookMessageRejected(LogbookError): + """ Raised when manipulating/creating message was rejected by the server or there was problem composing message.""" + pass + + +class LogbookInvalidMessageID(LogbookMessageRejected): + """ Raised when there is no message with specified ID on the server.""" + pass + + +class LogbookInvalidAttachmentType(LogbookMessageRejected): + """ Raised when passed attachment has invalid type.""" + pass diff --git a/python37/packages/bigtree/__init__.py b/python37/packages/bigtree/__init__.py new file mode 100644 index 0000000..e04a7ca --- /dev/null +++ b/python37/packages/bigtree/__init__.py @@ -0,0 +1,68 @@ +__version__ = "0.7.2" + +from bigtree.binarytree.construct import list_to_binarytree +from bigtree.dag.construct import dataframe_to_dag, dict_to_dag, list_to_dag +from bigtree.dag.export import dag_to_dataframe, dag_to_dict, dag_to_dot, dag_to_list +from bigtree.node.basenode import BaseNode +from bigtree.node.binarynode import BinaryNode +from bigtree.node.dagnode import DAGNode +from bigtree.node.node import Node +from bigtree.tree.construct import ( + add_dataframe_to_tree_by_name, + add_dataframe_to_tree_by_path, + add_dict_to_tree_by_name, + add_dict_to_tree_by_path, + add_path_to_tree, + dataframe_to_tree, + dataframe_to_tree_by_relation, + dict_to_tree, + list_to_tree, + list_to_tree_by_relation, + nested_dict_to_tree, + str_to_tree, +) +from bigtree.tree.export import ( + print_tree, + tree_to_dataframe, + tree_to_dict, + tree_to_dot, + tree_to_nested_dict, + tree_to_pillow, + yield_tree, +) +from bigtree.tree.helper import clone_tree, get_tree_diff, prune_tree +from bigtree.tree.modify import ( + copy_nodes, + copy_nodes_from_tree_to_tree, + copy_or_shift_logic, + shift_nodes, +) +from bigtree.tree.search import ( + find, + find_attr, + find_attrs, + find_children, + find_full_path, + find_name, + find_names, + find_path, + find_paths, + findall, +) +from bigtree.utils.exceptions import ( + CorruptedTreeError, + DuplicatedNodeError, + LoopError, + NotFoundError, + SearchError, + TreeError, +) +from bigtree.utils.iterators import ( + dag_iterator, + inorder_iter, + levelorder_iter, + levelordergroup_iter, + postorder_iter, + preorder_iter, +) +from bigtree.workflows.app_todo import AppToDo diff --git a/python37/packages/bigtree/binarytree/construct.py b/python37/packages/bigtree/binarytree/construct.py new file mode 100644 index 0000000..6fba051 --- /dev/null +++ b/python37/packages/bigtree/binarytree/construct.py @@ -0,0 +1,50 @@ +from typing import List, Type, Union + +from bigtree.node.binarynode import BinaryNode + + +def list_to_binarytree( + heapq_list: List[Union[int, float]], node_type: Type[BinaryNode] = BinaryNode +) -> BinaryNode: + """Construct tree from list of numbers (int or float) in heapq format. + + >>> from bigtree import list_to_binarytree, print_tree, tree_to_dot + >>> nums_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + >>> root = list_to_binarytree(nums_list) + >>> print_tree(root) + 1 + ├── 2 + │ ├── 4 + │ │ ├── 8 + │ │ └── 9 + │ └── 5 + │ └── 10 + └── 3 + ├── 6 + └── 7 + >>> graph = tree_to_dot(root, node_colour="gold") + >>> graph.write_png("assets/binarytree.png") + + .. image:: https://github.com/kayjan/bigtree/raw/master/assets/binarytree.png + + Args: + heapq_list (List[Union[int, float]]): list containing integer node names, ordered in heapq fashion + node_type (Type[BinaryNode]): node type of tree to be created, defaults to BinaryNode + + Returns: + (BinaryNode) + """ + if not len(heapq_list): + raise ValueError("Input list does not contain any data, check `heapq_list`") + + root = node_type(heapq_list[0]) + node_list = [root] + for idx, num in enumerate(heapq_list): + if idx: + if idx % 2: + parent_idx = int((idx - 1) / 2) + else: + parent_idx = int((idx - 2) / 2) + node = node_type(num, parent=node_list[parent_idx]) + node_list.append(node) + return root diff --git a/python37/packages/bigtree/dag/__init__.py b/python37/packages/bigtree/dag/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python37/packages/bigtree/dag/construct.py b/python37/packages/bigtree/dag/construct.py new file mode 100644 index 0000000..5be4cfc --- /dev/null +++ b/python37/packages/bigtree/dag/construct.py @@ -0,0 +1,186 @@ +from typing import List, Tuple, Type + +import numpy as np +import pandas as pd + +from bigtree.node.dagnode import DAGNode + +__all__ = ["list_to_dag", "dict_to_dag", "dataframe_to_dag"] + + +def list_to_dag( + relations: List[Tuple[str, str]], + node_type: Type[DAGNode] = DAGNode, +) -> DAGNode: + """Construct DAG from list of tuple containing parent-child names. + Note that node names must be unique. + + >>> from bigtree import list_to_dag, dag_iterator + >>> relations_list = [("a", "c"), ("a", "d"), ("b", "c"), ("c", "d"), ("d", "e")] + >>> dag = list_to_dag(relations_list) + >>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(dag)] + [('a', 'd'), ('c', 'd'), ('d', 'e'), ('a', 'c'), ('b', 'c')] + + Args: + relations (list): list containing tuple of parent-child names + node_type (Type[DAGNode]): node type of DAG to be created, defaults to DAGNode + + Returns: + (DAGNode) + """ + if not len(relations): + raise ValueError("Input list does not contain any data, check `relations`") + + relation_data = pd.DataFrame(relations, columns=["parent", "child"]) + return dataframe_to_dag( + relation_data, child_col="child", parent_col="parent", node_type=node_type + ) + + +def dict_to_dag( + relation_attrs: dict, + parent_key: str = "parents", + node_type: Type[DAGNode] = DAGNode, +) -> DAGNode: + """Construct DAG from nested dictionary, ``key``: child name, ``value``: dict of parent names, attribute name and + attribute value. + Note that node names must be unique. + + >>> from bigtree import dict_to_dag, dag_iterator + >>> relation_dict = { + ... "a": {"step": 1}, + ... "b": {"step": 1}, + ... "c": {"parents": ["a", "b"], "step": 2}, + ... "d": {"parents": ["a", "c"], "step": 2}, + ... "e": {"parents": ["d"], "step": 3}, + ... } + >>> dag = dict_to_dag(relation_dict, parent_key="parents") + >>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(dag)] + [('a', 'd'), ('c', 'd'), ('d', 'e'), ('a', 'c'), ('b', 'c')] + + Args: + relation_attrs (dict): dictionary containing node, node parents, and node attribute information, + key: child name, value: dict of parent names, node attribute and attribute value + parent_key (str): key of dictionary to retrieve list of parents name, defaults to "parent" + node_type (Type[DAGNode]): node type of DAG to be created, defaults to DAGNode + + Returns: + (DAGNode) + """ + if not len(relation_attrs): + raise ValueError("Dictionary does not contain any data, check `relation_attrs`") + + # Convert dictionary to dataframe + data = pd.DataFrame(relation_attrs).T.rename_axis("_tmp_child").reset_index() + assert ( + parent_key in data + ), f"Parent key {parent_key} not in dictionary, check `relation_attrs` and `parent_key`" + + data = data.explode(parent_key) + return dataframe_to_dag( + data, + child_col="_tmp_child", + parent_col=parent_key, + node_type=node_type, + ) + + +def dataframe_to_dag( + data: pd.DataFrame, + child_col: str = None, + parent_col: str = None, + attribute_cols: list = [], + node_type: Type[DAGNode] = DAGNode, +) -> DAGNode: + """Construct DAG from pandas DataFrame. + Note that node names must be unique. + + `child_col` and `parent_col` specify columns for child name and parent name to construct DAG. + `attribute_cols` specify columns for node attribute for child name + If columns are not specified, `child_col` takes first column, `parent_col` takes second column, and all other + columns are `attribute_cols`. + + >>> import pandas as pd + >>> from bigtree import dataframe_to_dag, dag_iterator + >>> relation_data = pd.DataFrame([ + ... ["a", None, 1], + ... ["b", None, 1], + ... ["c", "a", 2], + ... ["c", "b", 2], + ... ["d", "a", 2], + ... ["d", "c", 2], + ... ["e", "d", 3], + ... ], + ... columns=["child", "parent", "step"] + ... ) + >>> dag = dataframe_to_dag(relation_data) + >>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(dag)] + [('a', 'd'), ('c', 'd'), ('d', 'e'), ('a', 'c'), ('b', 'c')] + + Args: + data (pandas.DataFrame): data containing path and node attribute information + child_col (str): column of data containing child name information, defaults to None + if not set, it will take the first column of data + parent_col (str): column of data containing parent name information, defaults to None + if not set, it will take the second column of data + attribute_cols (list): columns of data containing child node attribute information, + if not set, it will take all columns of data except `child_col` and `parent_col` + node_type (Type[DAGNode]): node type of DAG to be created, defaults to DAGNode + + Returns: + (DAGNode) + """ + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not child_col: + child_col = data.columns[0] + if not parent_col: + parent_col = data.columns[1] + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(child_col) + attribute_cols.remove(parent_col) + + data_check = data.copy()[[child_col] + attribute_cols].drop_duplicates() + _duplicate_check = ( + data_check[child_col] + .value_counts() + .to_frame("counts") + .rename_axis(child_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate child name with different attributes\nCheck {_duplicate_check}" + ) + if np.any(data[child_col].isnull()): + raise ValueError(f"Child name cannot be empty, check {child_col}") + + node_dict = dict() + parent_node = None + + for row in data.reset_index(drop=True).to_dict(orient="index").values(): + child_name = row[child_col] + parent_name = row[parent_col] + node_attrs = row.copy() + del node_attrs[child_col] + del node_attrs[parent_col] + node_attrs = {k: v for k, v in node_attrs.items() if not pd.isnull(v)} + child_node = node_dict.get(child_name) + if not child_node: + child_node = node_type(child_name) + node_dict[child_name] = child_node + child_node.set_attrs(node_attrs) + + if not pd.isnull(parent_name): + parent_node = node_dict.get(parent_name) + if not parent_node: + parent_node = node_type(parent_name) + node_dict[parent_name] = parent_node + child_node.parents = [parent_node] + + return parent_node diff --git a/python37/packages/bigtree/dag/export.py b/python37/packages/bigtree/dag/export.py new file mode 100644 index 0000000..34c2ad8 --- /dev/null +++ b/python37/packages/bigtree/dag/export.py @@ -0,0 +1,269 @@ +from typing import Any, Dict, List, Tuple, Union + +import pandas as pd + +from bigtree.node.dagnode import DAGNode +from bigtree.utils.iterators import dag_iterator + +__all__ = ["dag_to_list", "dag_to_dict", "dag_to_dataframe", "dag_to_dot"] + + +def dag_to_list( + dag: DAGNode, +) -> List[Tuple[str, str]]: + """Export DAG to list of tuple containing parent-child names + + >>> from bigtree import DAGNode, dag_to_list + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> dag_to_list(a) + [('a', 'c'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('d', 'e')] + + Args: + dag (DAGNode): DAG to be exported + + Returns: + (List[Tuple[str, str]]) + """ + relations = [] + for parent_node, child_node in dag_iterator(dag): + relations.append((parent_node.node_name, child_node.node_name)) + return relations + + +def dag_to_dict( + dag: DAGNode, + parent_key: str = "parents", + attr_dict: dict = {}, + all_attrs: bool = False, +) -> Dict[str, Any]: + """Export tree to dictionary. + + Exported dictionary will have key as child name, and parent names and node attributes as a nested dictionary. + + >>> from bigtree import DAGNode, dag_to_dict + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> dag_to_dict(a, parent_key="parent", attr_dict={"step": "step no."}) + {'a': {'step no.': 1}, 'c': {'parent': ['a', 'b'], 'step no.': 2}, 'd': {'parent': ['a', 'c'], 'step no.': 2}, 'b': {'step no.': 1}, 'e': {'parent': ['d'], 'step no.': 3}} + + Args: + dag (DAGNode): DAG to be exported + parent_key (str): dictionary key for `node.parent.node_name`, defaults to `parents` + attr_dict (dict): dictionary mapping node attributes to dictionary key, + key: node attributes, value: corresponding dictionary key, optional + all_attrs (bool): indicator whether to retrieve all `Node` attributes + + Returns: + (dict) + """ + dag = dag.copy() + data_dict = {} + + for parent_node, child_node in dag_iterator(dag): + if parent_node.is_root: + data_parent = {} + if all_attrs: + data_parent.update( + parent_node.describe( + exclude_attributes=["name"], exclude_prefix="_" + ) + ) + else: + for k, v in attr_dict.items(): + data_parent[v] = parent_node.get_attr(k) + data_dict[parent_node.node_name] = data_parent + + if data_dict.get(child_node.node_name): + data_dict[child_node.node_name][parent_key].append(parent_node.node_name) + else: + data_child = {parent_key: [parent_node.node_name]} + if all_attrs: + data_child.update( + child_node.describe(exclude_attributes=["name"], exclude_prefix="_") + ) + else: + for k, v in attr_dict.items(): + data_child[v] = child_node.get_attr(k) + data_dict[child_node.node_name] = data_child + return data_dict + + +def dag_to_dataframe( + dag: DAGNode, + name_col: str = "name", + parent_col: str = "parent", + attr_dict: dict = {}, + all_attrs: bool = False, +) -> pd.DataFrame: + """Export DAG to pandas DataFrame. + + >>> from bigtree import DAGNode, dag_to_dataframe + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> dag_to_dataframe(a, name_col="name", parent_col="parent", attr_dict={"step": "step no."}) + name parent step no. + 0 a None 1 + 1 c a 2 + 2 d a 2 + 3 b None 1 + 4 c b 2 + 5 d c 2 + 6 e d 3 + + Args: + dag (DAGNode): DAG to be exported + name_col (str): column name for `node.node_name`, defaults to 'name' + parent_col (str): column name for `node.parent.node_name`, defaults to 'parent' + attr_dict (dict): dictionary mapping node attributes to column name, + key: node attributes, value: corresponding column in dataframe, optional + all_attrs (bool): indicator whether to retrieve all `Node` attributes + + Returns: + (pd.DataFrame) + """ + dag = dag.copy() + data_list = [] + + for parent_node, child_node in dag_iterator(dag): + if parent_node.is_root: + data_parent = {name_col: parent_node.node_name, parent_col: None} + if all_attrs: + data_parent.update( + parent_node.describe( + exclude_attributes=["name"], exclude_prefix="_" + ) + ) + else: + for k, v in attr_dict.items(): + data_parent[v] = parent_node.get_attr(k) + data_list.append(data_parent) + + data_child = {name_col: child_node.node_name, parent_col: parent_node.node_name} + if all_attrs: + data_child.update( + child_node.describe(exclude_attributes=["name"], exclude_prefix="_") + ) + else: + for k, v in attr_dict.items(): + data_child[v] = child_node.get_attr(k) + data_list.append(data_child) + return pd.DataFrame(data_list).drop_duplicates().reset_index(drop=True) + + +def dag_to_dot( + dag: Union[DAGNode, List[DAGNode]], + rankdir: str = "TB", + bg_colour: str = None, + node_colour: str = None, + edge_colour: str = None, + node_attr: str = None, + edge_attr: str = None, +): + r"""Export DAG tree or list of DAG trees to image. + Note that node names must be unique. + Posible node attributes include style, fillcolor, shape. + + >>> from bigtree import DAGNode, dag_to_dot + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> dag_graph = dag_to_dot(a) + + Export to image, dot file, etc. + + >>> dag_graph.write_png("tree_dag.png") + >>> dag_graph.write_dot("tree_dag.dot") + + Export to string + + >>> dag_graph.to_string() + 'strict digraph G {\nrankdir=TB;\nc [label=c];\na [label=a];\na -> c;\nd [label=d];\na [label=a];\na -> d;\nc [label=c];\nb [label=b];\nb -> c;\nd [label=d];\nc [label=c];\nc -> d;\ne [label=e];\nd [label=d];\nd -> e;\n}\n' + + Args: + dag (Union[DAGNode, List[DAGNode]]): DAG or list of DAGs to be exported + rankdir (str): set direction of graph layout, defaults to 'TB', can be 'BT, 'LR', 'RL' + bg_colour (str): background color of image, defaults to None + node_colour (str): fill colour of nodes, defaults to None + edge_colour (str): colour of edges, defaults to None + node_attr (str): node attribute for style, overrides node_colour, defaults to None + Possible node attributes include {"style": "filled", "fillcolor": "gold"} + edge_attr (str): edge attribute for style, overrides edge_colour, defaults to None + Possible edge attributes include {"style": "bold", "label": "edge label", "color": "black"} + + Returns: + (pydot.Dot) + """ + try: + import pydot + except ImportError: # pragma: no cover + raise ImportError( + "pydot not available. Please perform a\n\npip install 'bigtree[image]'\n\nto install required dependencies" + ) + + # Get style + if bg_colour: + graph_style = dict(bgcolor=bg_colour) + else: + graph_style = dict() + + if node_colour: + node_style = dict(style="filled", fillcolor=node_colour) + else: + node_style = dict() + + if edge_colour: + edge_style = dict(color=edge_colour) + else: + edge_style = dict() + + _graph = pydot.Dot( + graph_type="digraph", strict=True, rankdir=rankdir, **graph_style + ) + + if not isinstance(dag, list): + dag = [dag] + + for _dag in dag: + if not isinstance(_dag, DAGNode): + raise ValueError( + "Tree should be of type `DAGNode`, or inherit from `DAGNode`" + ) + _dag = _dag.copy() + + for parent_node, child_node in dag_iterator(_dag): + child_name = child_node.name + child_node_style = node_style.copy() + if node_attr and child_node.get_attr(node_attr): + child_node_style.update(child_node.get_attr(node_attr)) + if edge_attr: + edge_style.update(child_node.get_attr(edge_attr)) + pydot_child = pydot.Node( + name=child_name, label=child_name, **child_node_style + ) + _graph.add_node(pydot_child) + + parent_name = parent_node.name + parent_node_style = node_style.copy() + if node_attr and parent_node.get_attr(node_attr): + parent_node_style.update(parent_node.get_attr(node_attr)) + pydot_parent = pydot.Node( + name=parent_name, label=parent_name, **parent_node_style + ) + _graph.add_node(pydot_parent) + + edge = pydot.Edge(parent_name, child_name, **edge_style) + _graph.add_edge(edge) + + return _graph diff --git a/python37/packages/bigtree/node/__init__.py b/python37/packages/bigtree/node/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python37/packages/bigtree/node/basenode.py b/python37/packages/bigtree/node/basenode.py new file mode 100644 index 0000000..e7331d5 --- /dev/null +++ b/python37/packages/bigtree/node/basenode.py @@ -0,0 +1,696 @@ +import copy +from typing import Any, Dict, Iterable, List + +from bigtree.utils.exceptions import CorruptedTreeError, LoopError, TreeError +from bigtree.utils.iterators import preorder_iter + + +class BaseNode: + """ + BaseNode extends any Python class to a tree node. + Nodes can have attributes if they are initialized from `Node`, *dictionary*, or *pandas DataFrame*. + + Nodes can be linked to each other with `parent` and `children` setter methods, + or using bitshift operator with the convention `parent_node >> child_node` or `child_node << parent_node`. + + >>> from bigtree import Node, print_tree + >>> root = Node("a", age=90) + >>> b = Node("b", age=65) + >>> c = Node("c", age=60) + >>> d = Node("d", age=40) + >>> root.children = [b, c] + >>> d.parent = b + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ └── d [age=40] + └── c [age=60] + + >>> from bigtree import Node + >>> root = Node("a", age=90) + >>> b = Node("b", age=65) + >>> c = Node("c", age=60) + >>> d = Node("d", age=40) + >>> root >> b + >>> root >> c + >>> d << b + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ └── d [age=40] + └── c [age=60] + + Directly passing `parent` argument. + + >>> from bigtree import Node + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=b) + + Directly passing `children` argument. + + >>> from bigtree import Node + >>> d = Node("d") + >>> c = Node("c") + >>> b = Node("b", children=[d]) + >>> a = Node("a", children=[b, c]) + + **BaseNode Creation** + + Node can be created by instantiating a `BaseNode` class or by using a *dictionary*. + If node is created with dictionary, all keys of dictionary will be stored as class attributes. + + >>> from bigtree import Node + >>> root = Node.from_dict({"name": "a", "age": 90}) + + **BaseNode Attributes** + + These are node attributes that have getter and/or setter methods. + + Get and set other `BaseNode` + + 1. ``parent``: Get/set parent node + 2. ``children``: Get/set child nodes + + Get other `BaseNode` + + 1. ``ancestors``: Get ancestors of node excluding self, iterator + 2. ``descendants``: Get descendants of node excluding self, iterator + 3. ``leaves``: Get all leaf node(s) from self, iterator + 4. ``siblings``: Get siblings of self + 5. ``left_sibling``: Get sibling left of self + 6. ``right_sibling``: Get sibling right of self + + Get `BaseNode` configuration + + 1. ``node_path``: Get tuple of nodes from root + 2. ``is_root``: Get indicator if self is root node + 3. ``is_leaf``: Get indicator if self is leaf node + 4. ``root``: Get root node of tree + 5. ``depth``: Get depth of self + 6. ``max_depth``: Get maximum depth from root to leaf node + + **BaseNode Methods** + + These are methods available to be performed on `BaseNode`. + + Constructor methods + + 1. ``from_dict()``: Create BaseNode from dictionary + + `BaseNode` methods + + 1. ``describe()``: Get node information sorted by attributes, returns list of tuples + 2. ``get_attr(attr_name: str)``: Get value of node attribute + 3. ``set_attrs(attrs: dict)``: Set node attribute name(s) and value(s) + 4. ``go_to(node: BaseNode)``: Get a path from own node to another node from same tree + 5. ``copy()``: Deep copy BaseNode + 6. ``sort()``: Sort child nodes + + ---- + + """ + + def __init__(self, parent=None, children: List = None, **kwargs): + self.__parent = None + self.__children = [] + if children is None: + children = [] + self.parent = parent + self.children = children + if "parents" in kwargs: + raise ValueError( + "Attempting to set `parents` attribute, do you mean `parent`?" + ) + self.__dict__.update(**kwargs) + + @property + def parent(self): + """Get parent node + + Returns: + (Self) + """ + return self.__parent + + @staticmethod + def __check_parent_type(new_parent): + """Check parent type + + Args: + new_parent (Self): parent node + """ + if not (isinstance(new_parent, BaseNode) or new_parent is None): + raise TypeError( + f"Expect input to be BaseNode type or NoneType, received input type {type(new_parent)}" + ) + + def __check_parent_loop(self, new_parent): + """Check parent type + + Args: + new_parent (Self): parent node + """ + if new_parent is not None: + if new_parent is self: + raise LoopError("Error setting parent: Node cannot be parent of itself") + if any( + ancestor is self + for ancestor in new_parent.ancestors + if new_parent.ancestors + ): + raise LoopError( + "Error setting parent: Node cannot be ancestor of itself" + ) + + @parent.setter + def parent(self, new_parent): + """Set parent node + + Args: + new_parent (Self): parent node + """ + self.__check_parent_type(new_parent) + self.__check_parent_loop(new_parent) + + current_parent = self.parent + current_child_idx = None + + # Assign new parent - rollback if error + self.__pre_assign_parent(new_parent) + try: + # Remove self from old parent + if current_parent is not None: + if not any( + child is self for child in current_parent.children + ): # pragma: no cover + raise CorruptedTreeError( + "Error setting parent: Node does not exist as children of its parent" + ) + current_child_idx = current_parent.__children.index(self) + current_parent.__children.remove(self) + + # Assign self to new parent + self.__parent = new_parent + if new_parent is not None: + new_parent.__children.append(self) + + self.__post_assign_parent(new_parent) + + except Exception as exc_info: + # Remove self from new parent + if new_parent is not None: + new_parent.__children.remove(self) + + # Reassign self to old parent + self.__parent = current_parent + if current_child_idx is not None: + current_parent.__children.insert(current_child_idx, self) + raise TreeError(exc_info) + + def __pre_assign_parent(self, new_parent): + """Custom method to check before attaching parent + Can be overriden with `_BaseNode__pre_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + def __post_assign_parent(self, new_parent): + """Custom method to check after attaching parent + Can be overriden with `_BaseNode__post_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + @property + def parents(self) -> None: + """Do not allow `parents` attribute to be accessed""" + raise ValueError( + "Attempting to access `parents` attribute, do you mean `parent`?" + ) + + @parents.setter + def parents(self, new_parent): + """Do not allow `parents` attribute to be set + + Args: + new_parent (Self): parent node + """ + raise ValueError("Attempting to set `parents` attribute, do you mean `parent`?") + + @property + def children(self) -> Iterable: + """Get child nodes + + Returns: + (Iterable[Self]) + """ + return tuple(self.__children) + + def __check_children_type(self, new_children: List): + """Check child type + + Args: + new_children (List[Self]): child node + """ + if not isinstance(new_children, list): + raise TypeError( + f"Children input should be list type, received input type {type(new_children)}" + ) + + def __check_children_loop(self, new_children: List): + """Check child loop + + Args: + new_children (List[Self]): child node + """ + seen_children = [] + for new_child in new_children: + # Check type + if not isinstance(new_child, BaseNode): + raise TypeError( + f"Expect input to be BaseNode type, received input type {type(new_child)}" + ) + + # Check for loop and tree structure + if new_child is self: + raise LoopError("Error setting child: Node cannot be child of itself") + if any(child is new_child for child in self.ancestors): + raise LoopError( + "Error setting child: Node cannot be ancestors of itself" + ) + + # Check for duplicate children + if id(new_child) in seen_children: + raise TreeError( + "Error setting child: Node cannot be added multiple times as a child" + ) + else: + seen_children.append(id(new_child)) + + @children.setter + def children(self, new_children: List): + """Set child nodes + + Args: + new_children (List[Self]): child node + """ + self.__check_children_type(new_children) + self.__check_children_loop(new_children) + + current_new_children = { + new_child: (new_child.parent.__children.index(new_child), new_child.parent) + for new_child in new_children + if new_child.parent is not None + } + current_new_orphan = [ + new_child for new_child in new_children if new_child.parent is None + ] + current_children = list(self.children) + + # Assign new children - rollback if error + self.__pre_assign_children(new_children) + try: + # Remove old children from self + del self.children + + # Assign new children to self + self.__children = new_children + for new_child in new_children: + if new_child.parent: + new_child.parent.__children.remove(new_child) + new_child.__parent = self + self.__post_assign_children(new_children) + except Exception as exc_info: + # Reassign new children to their original parent + for child, idx_parent in current_new_children.items(): + child_idx, parent = idx_parent + child.__parent = parent + parent.__children.insert(child_idx, child) + for child in current_new_orphan: + child.__parent = None + + # Reassign old children to self + self.__children = current_children + for child in current_children: + child.__parent = self + raise TreeError(exc_info) + + @children.deleter + def children(self): + """Delete child node(s)""" + for child in self.children: + child.parent.__children.remove(child) + child.__parent = None + + def __pre_assign_children(self, new_children: List): + """Custom method to check before attaching children + Can be overriden with `_BaseNode__pre_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + def __post_assign_children(self, new_children: List): + """Custom method to check after attaching children + Can be overriden with `_BaseNode__post_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + @property + def ancestors(self) -> Iterable: + """Get iterator to yield all ancestors of self, does not include self + + Returns: + (Iterable[Self]) + """ + node = self.parent + while node is not None: + yield node + node = node.parent + + @property + def descendants(self) -> Iterable: + """Get iterator to yield all descendants of self, does not include self + + Returns: + (Iterable[Self]) + """ + yield from preorder_iter(self, filter_condition=lambda _node: _node != self) + + @property + def leaves(self) -> Iterable: + """Get iterator to yield all leaf nodes from self + + Returns: + (Iterable[Self]) + """ + yield from preorder_iter(self, filter_condition=lambda _node: _node.is_leaf) + + @property + def siblings(self) -> Iterable: + """Get siblings of self + + Returns: + (Iterable[Self]) + """ + if self.is_root: + return () + return tuple(child for child in self.parent.children if child is not self) + + @property + def left_sibling(self): + """Get sibling left of self + + Returns: + (Self) + """ + if self.parent: + children = self.parent.children + child_idx = children.index(self) + if child_idx: + return self.parent.children[child_idx - 1] + return None + + @property + def right_sibling(self): + """Get sibling right of self + + Returns: + (Self) + """ + if self.parent: + children = self.parent.children + child_idx = children.index(self) + if child_idx + 1 < len(children): + return self.parent.children[child_idx + 1] + return None + + @property + def node_path(self) -> Iterable: + """Get tuple of nodes starting from root + + Returns: + (Iterable[Self]) + """ + if self.is_root: + return [self] + return tuple(list(self.parent.node_path) + [self]) + + @property + def is_root(self) -> bool: + """Get indicator if self is root node + + Returns: + (bool) + """ + return self.parent is None + + @property + def is_leaf(self) -> bool: + """Get indicator if self is leaf node + + Returns: + (bool) + """ + return not len(list(self.children)) + + @property + def root(self): + """Get root node of tree + + Returns: + (Self) + """ + if self.is_root: + return self + return self.parent.root + + @property + def depth(self) -> int: + """Get depth of self, indexing starts from 1 + + Returns: + (int) + """ + if self.is_root: + return 1 + return self.parent.depth + 1 + + @property + def max_depth(self) -> int: + """Get maximum depth from root to leaf node + + Returns: + (int) + """ + return max(node.depth for node in list(preorder_iter(self.root))) + + @classmethod + def from_dict(cls, input_dict: Dict[str, Any]): + """Construct node from dictionary, all keys of dictionary will be stored as class attributes + Input dictionary must have key `name` if not `Node` will not have any name + + >>> from bigtree import Node + >>> a = Node.from_dict({"name": "a", "age": 90}) + + Args: + input_dict (Dict[str, Any]): dictionary with node information, key: attribute name, value: attribute value + + Returns: + (Self) + """ + return cls(**input_dict) + + def describe(self, exclude_attributes: List[str] = [], exclude_prefix: str = ""): + """Get node information sorted by attribute name, returns list of tuples + + >>> from bigtree.node.node import Node + >>> a = Node('a', age=90) + >>> a.describe() + [('_BaseNode__children', []), ('_BaseNode__parent', None), ('_sep', '/'), ('age', 90), ('name', 'a')] + >>> a.describe(exclude_prefix="_") + [('age', 90), ('name', 'a')] + >>> a.describe(exclude_prefix="_", exclude_attributes=["name"]) + [('age', 90)] + + Args: + exclude_attributes (List[str]): list of attributes to exclude + exclude_prefix (str): prefix of attributes to exclude + + Returns: + (List[str]) + """ + return [ + item + for item in sorted(self.__dict__.items(), key=lambda item: item[0]) + if (item[0] not in exclude_attributes) + and (not len(exclude_prefix) or not item[0].startswith(exclude_prefix)) + ] + + def get_attr(self, attr_name: str) -> Any: + """Get value of node attribute + Returns None if attribute name does not exist + + >>> from bigtree.node.node import Node + >>> a = Node('a', age=90) + >>> a.get_attr("age") + 90 + + Args: + attr_name (str): attribute name + + Returns: + (Any) + """ + try: + return self.__getattribute__(attr_name) + except AttributeError: + return None + + def set_attrs(self, attrs: Dict[str, Any]): + """Set node attributes + + >>> from bigtree.node.node import Node + >>> a = Node('a') + >>> a.set_attrs({"age": 90}) + >>> a + Node(/a, age=90) + + Args: + attrs (Dict[str, Any]): attribute dictionary, + key: attribute name, value: attribute value + """ + self.__dict__.update(attrs) + + def go_to(self, node) -> Iterable: + """Get path from current node to specified node from same tree + + >>> from bigtree import Node, print_tree + >>> a = Node(name="a") + >>> b = Node(name="b", parent=a) + >>> c = Node(name="c", parent=a) + >>> d = Node(name="d", parent=b) + >>> e = Node(name="e", parent=b) + >>> f = Node(name="f", parent=c) + >>> g = Node(name="g", parent=e) + >>> h = Node(name="h", parent=e) + >>> print_tree(a) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + >>> d.go_to(d) + [Node(/a/b/d, )] + >>> d.go_to(g) + [Node(/a/b/d, ), Node(/a/b, ), Node(/a/b/e, ), Node(/a/b/e/g, )] + >>> d.go_to(f) + [Node(/a/b/d, ), Node(/a/b, ), Node(/a, ), Node(/a/c, ), Node(/a/c/f, )] + + Args: + node (Self): node to travel to from current node, inclusive of start and end node + + Returns: + (Iterable) + """ + if not isinstance(node, BaseNode): + raise TypeError( + f"Expect node to be BaseNode type, received input type {type(node)}" + ) + if self.root != node.root: + raise TreeError( + f"Nodes are not from the same tree. Check {self} and {node}" + ) + if self == node: + return [self] + self_path = [self] + list(self.ancestors) + node_path = ([node] + list(node.ancestors))[::-1] + common_nodes = set(self_path).intersection(set(node_path)) + self_min_index, min_common_node = sorted( + [(self_path.index(_node), _node) for _node in common_nodes] + )[0] + node_min_index = node_path.index(min_common_node) + return self_path[:self_min_index] + node_path[node_min_index:] + + def copy(self): + """Deep copy self; clone BaseNode + + >>> from bigtree.node.node import Node + >>> a = Node('a') + >>> a_copy = a.copy() + + Returns: + (Self) + """ + return copy.deepcopy(self) + + def sort(self, **kwargs): + """Sort children, possible keyword arguments include ``key=lambda node: node.name``, ``reverse=True`` + + >>> from bigtree import Node, print_tree + >>> a = Node('a') + >>> c = Node("c", parent=a) + >>> b = Node("b", parent=a) + >>> print_tree(a) + a + ├── c + └── b + >>> a.sort(key=lambda node: node.name) + >>> print_tree(a) + a + ├── b + └── c + """ + children = list(self.children) + children.sort(**kwargs) + self.__children = children + + def __copy__(self): + """Shallow copy self + + >>> import copy + >>> from bigtree.node.node import Node + >>> a = Node('a') + >>> a_copy = copy.deepcopy(a) + + Returns: + (Self) + """ + obj = type(self).__new__(self.__class__) + obj.__dict__.update(self.__dict__) + return obj + + def __repr__(self): + class_name = self.__class__.__name__ + node_dict = self.describe(exclude_prefix="_") + node_description = ", ".join([f"{k}={v}" for k, v in node_dict]) + return f"{class_name}({node_description})" + + def __rshift__(self, other): + """Set children using >> bitshift operator for self >> other + + Args: + other (Self): other node, children + """ + other.parent = self + + def __lshift__(self, other): + """Set parent using << bitshift operator for self << other + + Args: + other (Self): other node, parent + """ + self.parent = other diff --git a/python37/packages/bigtree/node/binarynode.py b/python37/packages/bigtree/node/binarynode.py new file mode 100644 index 0000000..56c82b7 --- /dev/null +++ b/python37/packages/bigtree/node/binarynode.py @@ -0,0 +1,395 @@ +from typing import Iterable, List, Union + +from bigtree.node.node import Node +from bigtree.utils.exceptions import CorruptedTreeError, LoopError, TreeError + + +class BinaryNode(Node): + """ + BinaryNode is an extension of Node, and is able to extend to any Python class for Binary Tree implementation. + Nodes can have attributes if they are initialized from `BinaryNode`, *dictionary*, or *pandas DataFrame*. + + BinaryNode can be linked to each other with `children`, `left`, or `right` setter methods. + If initialized with `children`, it must be length 2, denoting left and right child. + + >>> from bigtree import BinaryNode, print_tree + >>> a = BinaryNode(1) + >>> b = BinaryNode(2) + >>> c = BinaryNode(3) + >>> d = BinaryNode(4) + >>> a.children = [b, c] + >>> b.right = d + >>> print_tree(a) + 1 + ├── 2 + │ └── 4 + └── 3 + + Directly passing `left`, `right`, or `children` argument. + + >>> from bigtree import BinaryNode + >>> d = BinaryNode(4) + >>> c = BinaryNode(3) + >>> b = BinaryNode(2, right=d) + >>> a = BinaryNode(1, children=[b, c]) + + **BinaryNode Creation** + + Node can be created by instantiating a `BinaryNode` class or by using a *dictionary*. + If node is created with dictionary, all keys of dictionary will be stored as class attributes. + + >>> from bigtree import BinaryNode + >>> a = BinaryNode.from_dict({"name": "1"}) + >>> a + BinaryNode(name=1, val=1) + + **BinaryNode Attributes** + + These are node attributes that have getter and/or setter methods. + + Get `BinaryNode` configuration + + 1. ``left``: Get left children + 2. ``right``: Get right children + + ---- + + """ + + def __init__( + self, + name: Union[str, int] = "", + left=None, + right=None, + parent=None, + children: List = None, + **kwargs, + ): + self.val = int(name) + self.name = str(name) + self._sep = "/" + self.__parent = None + self.__children = [] + if not children: + children = [] + if len(children): + if len(children) and len(children) != 2: + raise ValueError("Children input must have length 2") + if left and left != children[0]: + raise ValueError( + f"Attempting to set both left and children with mismatched values\n" + f"Check left {left} and children {children}" + ) + if right and right != children[1]: + raise ValueError( + f"Attempting to set both right and children with mismatched values\n" + f"Check right {right} and children {children}" + ) + else: + children = [left, right] + self.parent = parent + self.children = children + if "parents" in kwargs: + raise ValueError( + "Attempting to set `parents` attribute, do you mean `parent`?" + ) + self.__dict__.update(**kwargs) + + @property + def left(self): + """Get left children + + Returns: + (Self) + """ + return self.__children[0] + + @left.setter + def left(self, left_child): + """Set left children + + Args: + left_child (Self): left child + """ + self.children = [left_child, self.right] + + @property + def right(self): + """Get right children + + Returns: + (Self) + """ + return self.__children[1] + + @right.setter + def right(self, right_child): + """Set right children + + Args: + right_child (Self): right child + """ + self.children = [self.left, right_child] + + @property + def parent(self): + """Get parent node + + Returns: + (Self) + """ + return self.__parent + + @staticmethod + def __check_parent_type(new_parent): + """Check parent type + + Args: + new_parent (Self): parent node + """ + if not (isinstance(new_parent, BinaryNode) or new_parent is None): + raise TypeError( + f"Expect input to be BinaryNode type or NoneType, received input type {type(new_parent)}" + ) + + @parent.setter + def parent(self, new_parent): + """Set parent node + + Args: + new_parent (Self): parent node + """ + self.__check_parent_type(new_parent) + self._BaseNode__check_parent_loop(new_parent) + + current_parent = self.parent + current_child_idx = None + + # Assign new parent - rollback if error + self.__pre_assign_parent(new_parent) + try: + # Remove self from old parent + if current_parent is not None: + if not any( + child is self for child in current_parent.children + ): # pragma: no cover + raise CorruptedTreeError( + "Error setting parent: Node does not exist as children of its parent" + ) + current_child_idx = current_parent.__children.index(self) + current_parent.__children[current_child_idx] = None + + # Assign self to new parent + self.__parent = new_parent + if new_parent is not None: + inserted = False + for child_idx, child in enumerate(new_parent.__children): + if not child and not inserted: + new_parent.__children[child_idx] = self + inserted = True + if not inserted: + raise TreeError(f"Parent {new_parent} already has 2 children") + + self.__post_assign_parent(new_parent) + + except Exception as exc_info: + # Remove self from new parent + if new_parent is not None and self in new_parent.__children: + child_idx = new_parent.__children.index(self) + new_parent.__children[child_idx] = None + + # Reassign self to old parent + self.__parent = current_parent + if current_child_idx is not None: + current_parent.__children[current_child_idx] = self + raise TreeError(exc_info) + + def __pre_assign_parent(self, new_parent): + """Custom method to check before attaching parent + Can be overriden with `_BinaryNode__pre_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + def __post_assign_parent(self, new_parent): + """Custom method to check after attaching parent + Can be overriden with `_BinaryNode__post_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + @property + def children(self) -> Iterable: + """Get child nodes + + Returns: + (Iterable[Self]) + """ + return tuple(self.__children) + + def __check_children_type(self, new_children: List) -> List: + """Check child type + + Args: + new_children (List[Self]): child node + """ + if not len(new_children): + new_children = [None, None] + if len(new_children) != 2: + raise ValueError("Children input must have length 2") + return new_children + + def __check_children_loop(self, new_children: List): + """Check child loop + + Args: + new_children (List[Self]): child node + """ + seen_children = [] + for new_child in new_children: + # Check type + if new_child is not None and not isinstance(new_child, BinaryNode): + raise TypeError( + f"Expect input to be BinaryNode type or NoneType, received input type {type(new_child)}" + ) + + # Check for loop and tree structure + if new_child is self: + raise LoopError("Error setting child: Node cannot be child of itself") + if any(child is new_child for child in self.ancestors): + raise LoopError( + "Error setting child: Node cannot be ancestors of itself" + ) + + # Check for duplicate children + if new_child is not None: + if id(new_child) in seen_children: + raise TreeError( + "Error setting child: Node cannot be added multiple times as a child" + ) + else: + seen_children.append(id(new_child)) + + @children.setter + def children(self, new_children: List): + """Set child nodes + + Args: + new_children (List[Self]): child node + """ + self._BaseNode__check_children_type(new_children) + new_children = self.__check_children_type(new_children) + self.__check_children_loop(new_children) + + current_new_children = { + new_child: ( + new_child.parent.__children.index(new_child), + new_child.parent, + ) + for new_child in new_children + if new_child is not None and new_child.parent is not None + } + current_new_orphan = [ + new_child + for new_child in new_children + if new_child is not None and new_child.parent is None + ] + current_children = list(self.children) + + # Assign new children - rollback if error + self.__pre_assign_children(new_children) + try: + # Remove old children from self + del self.children + + # Assign new children to self + self.__children = new_children + for new_child in new_children: + if new_child is not None: + if new_child.parent: + child_idx = new_child.parent.__children.index(new_child) + new_child.parent.__children[child_idx] = None + new_child.__parent = self + self.__post_assign_children(new_children) + except Exception as exc_info: + # Reassign new children to their original parent + for child, idx_parent in current_new_children.items(): + child_idx, parent = idx_parent + child.__parent = parent + parent.__children[child_idx] = child + for child in current_new_orphan: + child.__parent = None + + # Reassign old children to self + self.__children = current_children + for child in current_children: + if child: + child.__parent = self + raise TreeError(exc_info) + + @children.deleter + def children(self): + """Delete child node(s)""" + for child in self.children: + if child is not None: + child.parent.__children.remove(child) + child.__parent = None + + def __pre_assign_children(self, new_children: List): + """Custom method to check before attaching children + Can be overriden with `_BinaryNode__pre_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + def __post_assign_children(self, new_children: List): + """Custom method to check after attaching children + Can be overriden with `_BinaryNode__post_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + @property + def is_leaf(self) -> bool: + """Get indicator if self is leaf node + + Returns: + (bool) + """ + return not len([child for child in self.children if child]) + + def sort(self, **kwargs): + """Sort children, possible keyword arguments include ``key=lambda node: node.name``, ``reverse=True`` + + >>> from bigtree import BinaryNode, print_tree + >>> a = BinaryNode(1) + >>> c = BinaryNode(3, parent=a) + >>> b = BinaryNode(2, parent=a) + >>> print_tree(a) + 1 + ├── 3 + └── 2 + >>> a.sort(key=lambda node: node.val) + >>> print_tree(a) + 1 + ├── 2 + └── 3 + """ + children = [child for child in self.children if child] + if len(children) == 2: + children.sort(**kwargs) + self.__children = children + + def __repr__(self): + class_name = self.__class__.__name__ + node_dict = self.describe(exclude_prefix="_", exclude_attributes=[]) + node_description = ", ".join([f"{k}={v}" for k, v in node_dict]) + return f"{class_name}({node_description})" diff --git a/python37/packages/bigtree/node/dagnode.py b/python37/packages/bigtree/node/dagnode.py new file mode 100644 index 0000000..0d181cd --- /dev/null +++ b/python37/packages/bigtree/node/dagnode.py @@ -0,0 +1,570 @@ +import copy +from typing import Any, Dict, Iterable, List + +from bigtree.utils.exceptions import LoopError, TreeError +from bigtree.utils.iterators import preorder_iter + + +class DAGNode: + """ + Base DAGNode extends any Python class to a DAG node, for DAG implementation. + In DAG implementation, a node can have multiple parents. + Parents and children cannot be reassigned once assigned, as Nodes are allowed to have multiple parents and children. + If each node only has one parent, use `Node` class. + DAGNodes can have attributes if they are initialized from `DAGNode` or dictionary. + + DAGNode can be linked to each other with `parents` and `children` setter methods, + or using bitshift operator with the convention `parent_node >> child_node` or `child_node << parent_node`. + + >>> from bigtree import DAGNode + >>> a = DAGNode("a") + >>> b = DAGNode("b") + >>> c = DAGNode("c") + >>> d = DAGNode("d") + >>> c.parents = [a, b] + >>> c.children = [d] + + >>> from bigtree import DAGNode + >>> a = DAGNode("a") + >>> b = DAGNode("b") + >>> c = DAGNode("c") + >>> d = DAGNode("d") + >>> a >> c + >>> b >> c + >>> d << c + + Directly passing `parents` argument. + + >>> from bigtree import DAGNode + >>> a = DAGNode("a") + >>> b = DAGNode("b") + >>> c = DAGNode("c", parents=[a, b]) + >>> d = DAGNode("d", parents=[c]) + + Directly passing `children` argument. + + >>> from bigtree import DAGNode + >>> d = DAGNode("d") + >>> c = DAGNode("c", children=[d]) + >>> b = DAGNode("b", children=[c]) + >>> a = DAGNode("a", children=[c]) + + **DAGNode Creation** + + Node can be created by instantiating a `DAGNode` class or by using a *dictionary*. + If node is created with dictionary, all keys of dictionary will be stored as class attributes. + + >>> from bigtree import DAGNode + >>> a = DAGNode.from_dict({"name": "a", "age": 90}) + + **DAGNode Attributes** + + These are node attributes that have getter and/or setter methods. + + Get and set other `DAGNode` + + 1. ``parents``: Get/set parent nodes + 2. ``children``: Get/set child nodes + + Get other `DAGNode` + + 1. ``ancestors``: Get ancestors of node excluding self, iterator + 2. ``descendants``: Get descendants of node excluding self, iterator + 3. ``siblings``: Get siblings of self + + Get `DAGNode` configuration + + 1. ``node_name``: Get node name, without accessing `name` directly + 2. ``is_root``: Get indicator if self is root node + 3. ``is_leaf``: Get indicator if self is leaf node + + **DAGNode Methods** + + These are methods available to be performed on `DAGNode`. + + Constructor methods + + 1. ``from_dict()``: Create DAGNode from dictionary + + `DAGNode` methods + + 1. ``describe()``: Get node information sorted by attributes, returns list of tuples + 2. ``get_attr(attr_name: str)``: Get value of node attribute + 3. ``set_attrs(attrs: dict)``: Set node attribute name(s) and value(s) + 4. ``go_to(node: BaseNode)``: Get a path from own node to another node from same DAG + 5. ``copy()``: Deep copy DAGNode + + ---- + + """ + + def __init__( + self, name: str = "", parents: List = None, children: List = None, **kwargs + ): + self.name = name + self.__parents = [] + self.__children = [] + if parents is None: + parents = [] + if children is None: + children = [] + self.parents = parents + self.children = children + if "parent" in kwargs: + raise ValueError( + "Attempting to set `parent` attribute, do you mean `parents`?" + ) + self.__dict__.update(**kwargs) + + @property + def parent(self) -> None: + """Do not allow `parent` attribute to be accessed""" + raise ValueError( + "Attempting to access `parent` attribute, do you mean `parents`?" + ) + + @parent.setter + def parent(self, new_parent): + """Do not allow `parent` attribute to be set + + Args: + new_parent (Self): parent node + """ + raise ValueError("Attempting to set `parent` attribute, do you mean `parents`?") + + @property + def parents(self) -> Iterable: + """Get parent nodes + + Returns: + (Iterable[Self]) + """ + return tuple(self.__parents) + + @staticmethod + def __check_parent_type(new_parents: List): + """Check parent type + + Args: + new_parents (List[Self]): parent nodes + """ + if not isinstance(new_parents, list): + raise TypeError( + f"Parents input should be list type, received input type {type(new_parents)}" + ) + + def __check_parent_loop(self, new_parents: List): + """Check parent type + + Args: + new_parents (List[Self]): parent nodes + """ + seen_parent = [] + for new_parent in new_parents: + # Check type + if not isinstance(new_parent, DAGNode): + raise TypeError( + f"Expect input to be DAGNode type, received input type {type(new_parent)}" + ) + + # Check for loop and tree structure + if new_parent is self: + raise LoopError("Error setting parent: Node cannot be parent of itself") + if new_parent.ancestors: + if any(ancestor is self for ancestor in new_parent.ancestors): + raise LoopError( + "Error setting parent: Node cannot be ancestor of itself" + ) + + # Check for duplicate children + if id(new_parent) in seen_parent: + raise TreeError( + "Error setting parent: Node cannot be added multiple times as a parent" + ) + else: + seen_parent.append(id(new_parent)) + + @parents.setter + def parents(self, new_parents: List): + """Set parent node + + Args: + new_parents (List[Self]): parent nodes + """ + self.__check_parent_type(new_parents) + self.__check_parent_loop(new_parents) + + current_parents = self.__parents.copy() + + # Assign new parents - rollback if error + self.__pre_assign_parents(new_parents) + try: + # Assign self to new parent + for new_parent in new_parents: + if new_parent not in self.__parents: + self.__parents.append(new_parent) + new_parent.__children.append(self) + + self.__post_assign_parents(new_parents) + except Exception as exc_info: + # Remove self from new parent + for new_parent in new_parents: + if new_parent not in current_parents: + self.__parents.remove(new_parent) + new_parent.__children.remove(self) + raise TreeError( + f"{exc_info}, current parents {current_parents}, new parents {new_parents}" + ) + + def __pre_assign_parents(self, new_parents: List): + """Custom method to check before attaching parent + Can be overriden with `_DAGNode__pre_assign_parent()` + + Args: + new_parents (List): new parents to be added + """ + pass + + def __post_assign_parents(self, new_parents: List): + """Custom method to check after attaching parent + Can be overriden with `_DAGNode__post_assign_parent()` + + Args: + new_parents (List): new parents to be added + """ + pass + + @property + def children(self) -> Iterable: + """Get child nodes + + Returns: + (Iterable[Self]) + """ + return tuple(self.__children) + + def __check_children_type(self, new_children: List): + """Check child type + + Args: + new_children (List[Self]): child node + """ + if not isinstance(new_children, list): + raise TypeError( + f"Children input should be list type, received input type {type(new_children)}" + ) + + def __check_children_loop(self, new_children: List): + """Check child loop + + Args: + new_children (List[Self]): child node + """ + seen_children = [] + for new_child in new_children: + # Check type + if not isinstance(new_child, DAGNode): + raise TypeError( + f"Expect input to be DAGNode type, received input type {type(new_child)}" + ) + + # Check for loop and tree structure + if new_child is self: + raise LoopError("Error setting child: Node cannot be child of itself") + if any(child is new_child for child in self.ancestors): + raise LoopError( + "Error setting child: Node cannot be ancestors of itself" + ) + + # Check for duplicate children + if id(new_child) in seen_children: + raise TreeError( + "Error setting child: Node cannot be added multiple times as a child" + ) + else: + seen_children.append(id(new_child)) + + @children.setter + def children(self, new_children: List): + """Set child nodes + + Args: + new_children (List[Self]): child node + """ + self.__check_children_type(new_children) + self.__check_children_loop(new_children) + + current_children = list(self.children) + + # Assign new children - rollback if error + self.__pre_assign_children(new_children) + try: + # Assign new children to self + for new_child in new_children: + if self not in new_child.__parents: + new_child.__parents.append(self) + self.__children.append(new_child) + self.__post_assign_children(new_children) + except Exception as exc_info: + # Reassign old children to self + for new_child in new_children: + if new_child not in current_children: + new_child.__parents.remove(self) + self.__children.remove(new_child) + raise TreeError(exc_info) + + def __pre_assign_children(self, new_children: List): + """Custom method to check before attaching children + Can be overriden with `_DAGNode__pre_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + def __post_assign_children(self, new_children: List): + """Custom method to check after attaching children + Can be overriden with `_DAGNode__post_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + @property + def ancestors(self) -> Iterable: + """Get iterator to yield all ancestors of self, does not include self + + Returns: + (Iterable[Self]) + """ + if not len(list(self.parents)): + return () + + def recursive_parent(node): + for _node in node.parents: + yield from recursive_parent(_node) + yield _node + + ancestors = list(recursive_parent(self)) + return list(dict.fromkeys(ancestors)) + + @property + def descendants(self) -> Iterable: + """Get iterator to yield all descendants of self, does not include self + + Returns: + (Iterable[Self]) + """ + descendants = list( + preorder_iter(self, filter_condition=lambda _node: _node != self) + ) + return list(dict.fromkeys(descendants)) + + @property + def siblings(self) -> Iterable: + """Get siblings of self + + Returns: + (Iterable[Self]) + """ + if self.is_root: + return () + return tuple( + child + for parent in self.parents + for child in parent.children + if child is not self + ) + + @property + def node_name(self) -> str: + """Get node name + + Returns: + (str) + """ + return self.name + + @property + def is_root(self) -> bool: + """Get indicator if self is root node + + Returns: + (bool) + """ + return not len(list(self.parents)) + + @property + def is_leaf(self) -> bool: + """Get indicator if self is leaf node + + Returns: + (bool) + """ + return not len(list(self.children)) + + @classmethod + def from_dict(cls, input_dict: Dict[str, Any]): + """Construct node from dictionary, all keys of dictionary will be stored as class attributes + Input dictionary must have key `name` if not `Node` will not have any name + + >>> from bigtree import DAGNode + >>> a = DAGNode.from_dict({"name": "a", "age": 90}) + + Args: + input_dict (Dict[str, Any]): dictionary with node information, key: attribute name, value: attribute value + + Returns: + (Self) + """ + return cls(**input_dict) + + def describe(self, exclude_attributes: List[str] = [], exclude_prefix: str = ""): + """Get node information sorted by attribute name, returns list of tuples + + Args: + exclude_attributes (List[str]): list of attributes to exclude + exclude_prefix (str): prefix of attributes to exclude + + Returns: + (List[str]) + """ + return [ + item + for item in sorted(self.__dict__.items(), key=lambda item: item[0]) + if (item[0] not in exclude_attributes) + and (not len(exclude_prefix) or not item[0].startswith(exclude_prefix)) + ] + + def get_attr(self, attr_name: str) -> Any: + """Get value of node attribute + Returns None if attribute name does not exist + + Args: + attr_name (str): attribute name + + Returns: + (Any) + """ + try: + return self.__getattribute__(attr_name) + except AttributeError: + return None + + def set_attrs(self, attrs: Dict[str, Any]): + """Set node attributes + + >>> from bigtree.node.dagnode import DAGNode + >>> a = DAGNode('a') + >>> a.set_attrs({"age": 90}) + >>> a + DAGNode(a, age=90) + + Args: + attrs (Dict[str, Any]): attribute dictionary, + key: attribute name, value: attribute value + """ + self.__dict__.update(attrs) + + def go_to(self, node) -> Iterable[Iterable]: + """Get list of possible paths from current node to specified node from same tree + + >>> from bigtree import DAGNode + >>> a = DAGNode("a") + >>> b = DAGNode("b") + >>> c = DAGNode("c") + >>> d = DAGNode("d") + >>> a >> c + >>> b >> c + >>> c >> d + >>> a >> d + >>> a.go_to(c) + [[DAGNode(a, ), DAGNode(c, )]] + >>> a.go_to(d) + [[DAGNode(a, ), DAGNode(c, ), DAGNode(d, )], [DAGNode(a, ), DAGNode(d, )]] + >>> a.go_to(b) + Traceback (most recent call last): + ... + bigtree.utils.exceptions.TreeError: It is not possible to go to DAGNode(b, ) + + Args: + node (Self): node to travel to from current node, inclusive of start and end node + + Returns: + (Iterable[Iterable]) + """ + if not isinstance(node, DAGNode): + raise TypeError( + f"Expect node to be DAGNode type, received input type {type(node)}" + ) + if self == node: + return [self] + if node not in self.descendants: + raise TreeError(f"It is not possible to go to {node}") + + self.__path = [] + + def recursive_path(_node, _path, _ans): + if _node: # pragma: no cover + _path.append(_node) + if _node == node: + return _path + for _child in _node.children: + ans = recursive_path(_child, _path.copy(), _ans) + if ans: + self.__path.append(ans) + + recursive_path(self, [], []) + return self.__path + + def copy(self): + """Deep copy self; clone DAGNode + + >>> from bigtree.node.dagnode import DAGNode + >>> a = DAGNode('a') + >>> a_copy = a.copy() + + Returns: + (Self) + """ + return copy.deepcopy(self) + + def __copy__(self): + """Shallow copy self + + >>> import copy + >>> from bigtree.node.dagnode import DAGNode + >>> a = DAGNode('a') + >>> a_copy = copy.deepcopy(a) + + Returns: + (Self) + """ + obj = type(self).__new__(self.__class__) + obj.__dict__.update(self.__dict__) + return obj + + def __rshift__(self, other): + """Set children using >> bitshift operator for self >> other + + Args: + other (Self): other node, children + """ + other.parents = [self] + + def __lshift__(self, other): + """Set parent using << bitshift operator for self << other + + Args: + other (Self): other node, parent + """ + self.parents = [other] + + def __repr__(self): + class_name = self.__class__.__name__ + node_dict = self.describe(exclude_attributes=["name"]) + node_description = ", ".join( + [f"{k}={v}" for k, v in node_dict if not k.startswith("_")] + ) + return f"{class_name}({self.node_name}, {node_description})" diff --git a/python37/packages/bigtree/node/node.py b/python37/packages/bigtree/node/node.py new file mode 100644 index 0000000..caadc12 --- /dev/null +++ b/python37/packages/bigtree/node/node.py @@ -0,0 +1,204 @@ +from collections import Counter +from typing import List + +from bigtree.node.basenode import BaseNode +from bigtree.utils.exceptions import TreeError + + +class Node(BaseNode): + """ + Node is an extension of BaseNode, and is able to extend to any Python class. + Nodes can have attributes if they are initialized from `Node`, *dictionary*, or *pandas DataFrame*. + + Nodes can be linked to each other with `parent` and `children` setter methods. + + >>> from bigtree import Node + >>> a = Node("a") + >>> b = Node("b") + >>> c = Node("c") + >>> d = Node("d") + >>> b.parent = a + >>> b.children = [c, d] + + Directly passing `parent` argument. + + >>> from bigtree import Node + >>> a = Node("a") + >>> b = Node("b", parent=a) + >>> c = Node("c", parent=b) + >>> d = Node("d", parent=b) + + Directly passing `children` argument. + + >>> from bigtree import Node + >>> d = Node("d") + >>> c = Node("c") + >>> b = Node("b", children=[c, d]) + >>> a = Node("a", children=[b]) + + **Node Creation** + + Node can be created by instantiating a `Node` class or by using a *dictionary*. + If node is created with dictionary, all keys of dictionary will be stored as class attributes. + + >>> from bigtree import Node + >>> a = Node.from_dict({"name": "a", "age": 90}) + + **Node Attributes** + + These are node attributes that have getter and/or setter methods. + + Get and set `Node` configuration + + 1. ``sep``: Get/set separator for path name + + Get `Node` configuration + + 1. ``node_name``: Get node name, without accessing `name` directly + 2. ``path_name``: Get path name from root, separated by `sep` + + ---- + + """ + + def __init__(self, name: str = "", **kwargs): + self.name = name + self._sep: str = "/" + super().__init__(**kwargs) + if not self.node_name: + raise TreeError("Node must have a `name` attribute") + + @property + def node_name(self) -> str: + """Get node name + + Returns: + (str) + """ + return self.name + + @property + def sep(self) -> str: + """Get separator, gets from root node + + Returns: + (str) + """ + if self.is_root: + return self._sep + return self.parent.sep + + @sep.setter + def sep(self, value: str): + """Set separator, affects root node + + Args: + value (str): separator to replace default separator + """ + self.root._sep = value + + @property + def path_name(self) -> str: + """Get path name, separated by self.sep + + Returns: + (str) + """ + if self.is_root: + return f"{self.sep}{self.name}" + return f"{self.parent.path_name}{self.sep}{self.name}" + + def __pre_assign_children(self, new_children: List): + """Custom method to check before attaching children + Can be overriden with `_Node__pre_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + def __post_assign_children(self, new_children: List): + """Custom method to check after attaching children + Can be overriden with `_Node__post_assign_children()` + + Args: + new_children (List[Self]): new children to be added + """ + pass + + def __pre_assign_parent(self, new_parent): + """Custom method to check before attaching parent + Can be overriden with `_Node__pre_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + def __post_assign_parent(self, new_parent): + """Custom method to check after attaching parent + Can be overriden with `_Node__post_assign_parent()` + + Args: + new_parent (Self): new parent to be added + """ + pass + + def _BaseNode__pre_assign_parent(self, new_parent): + """Do not allow duplicate nodes of same path + + Args: + new_parent (Self): new parent to be added + """ + self.__pre_assign_parent(new_parent) + if new_parent is not None: + if any( + child.node_name == self.node_name and child is not self + for child in new_parent.children + ): + raise TreeError( + f"Error: Duplicate node with same path\n" + f"There exist a node with same path {new_parent.path_name}{self.sep}{self.node_name}" + ) + + def _BaseNode__post_assign_parent(self, new_parent): + """No rules + + Args: + new_parent (Self): new parent to be added + """ + self.__post_assign_parent(new_parent) + + def _BaseNode__pre_assign_children(self, new_children: List): + """Do not allow duplicate nodes of same path + + Args: + new_children (List[Self]): new children to be added + """ + self.__pre_assign_children(new_children) + children_names = [node.node_name for node in new_children] + duplicated_names = [ + item[0] for item in Counter(children_names).items() if item[1] > 1 + ] + if len(duplicated_names): + duplicated_names = " and ".join( + [f"{self.path_name}{self.sep}{name}" for name in duplicated_names] + ) + raise TreeError( + f"Error: Duplicate node with same path\n" + f"Attempting to add nodes same path {duplicated_names}" + ) + + def _BaseNode__post_assign_children(self, new_children: List): + """No rules + + Args: + new_children (List[Self]): new children to be added + """ + self.__post_assign_children(new_children) + + def __repr__(self): + class_name = self.__class__.__name__ + node_dict = self.describe(exclude_prefix="_", exclude_attributes=["name"]) + node_description = ", ".join([f"{k}={v}" for k, v in node_dict]) + return f"{class_name}({self.path_name}, {node_description})" diff --git a/python37/packages/bigtree/tree/__init__.py b/python37/packages/bigtree/tree/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python37/packages/bigtree/tree/construct.py b/python37/packages/bigtree/tree/construct.py new file mode 100644 index 0000000..448b5e8 --- /dev/null +++ b/python37/packages/bigtree/tree/construct.py @@ -0,0 +1,914 @@ +import re +from collections import OrderedDict +from typing import List, Tuple, Type + +import numpy as np +import pandas as pd + +from bigtree.node.node import Node +from bigtree.tree.export import tree_to_dataframe +from bigtree.tree.search import find_children, find_name +from bigtree.utils.exceptions import DuplicatedNodeError, TreeError + +__all__ = [ + "add_path_to_tree", + "add_dict_to_tree_by_path", + "add_dict_to_tree_by_name", + "add_dataframe_to_tree_by_path", + "add_dataframe_to_tree_by_name", + "str_to_tree", + "list_to_tree", + "list_to_tree_by_relation", + "dict_to_tree", + "nested_dict_to_tree", + "dataframe_to_tree", + "dataframe_to_tree_by_relation", +] + + +def add_path_to_tree( + tree: Node, + path: str, + sep: str = "/", + duplicate_name_allowed: bool = True, + node_attrs: dict = {}, +) -> Node: + """Add nodes and attributes to existing tree *in-place*, return node of added path. + Adds to existing tree from list of path strings. + + Path should contain `Node` name, separated by `sep`. + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + - Path separator `sep` is for the input `path` and can be different from that of existing tree. + + Path can start from root node `name`, or start with `sep`. + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + >>> from bigtree import add_path_to_tree, print_tree + >>> root = Node("a") + >>> add_path_to_tree(root, "a/b/c") + Node(/a/b/c, ) + >>> print_tree(root) + a + └── b + └── c + + Args: + tree (Node): existing tree + path (str): path to be added to tree + sep (str): path separator for input `path` + duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True + node_attrs (dict): attributes to add to node, key: attribute name, value: attribute value, optional + + Returns: + (Node) + """ + if not len(path): + raise ValueError("Path is empty, check `path`") + + tree_root = tree.root + tree_sep = tree_root.sep + node_type = tree_root.__class__ + branch = path.lstrip(sep).rstrip(sep).split(sep) + if branch[0] != tree_root.node_name: + raise TreeError( + f"Error: Path does not have same root node, expected {tree_root.node_name}, received {branch[0]}\n" + f"Check your input paths or verify that path separator `sep` is set correctly" + ) + + # Grow tree + node = tree_root + parent_node = tree_root + for idx in range(1, len(branch)): + node_name = branch[idx] + node_path = tree_sep.join(branch[: idx + 1]) + if not duplicate_name_allowed: + node = find_name(tree_root, node_name) + if node and not node.path_name.endswith(node_path): + raise DuplicatedNodeError( + f"Node {node_name} already exists, try setting `duplicate_name_allowed` to True " + f"to allow `Node` with same node name" + ) + else: + node = find_children(parent_node, node_name) + if not node: + node = node_type(branch[idx]) + node.parent = parent_node + parent_node = node + node.set_attrs(node_attrs) + return node + + +def add_dict_to_tree_by_path( + tree: Node, + path_attrs: dict, + sep: str = "/", + duplicate_name_allowed: bool = True, +) -> Node: + """Add nodes and attributes to tree *in-place*, return root of tree. + Adds to existing tree from nested dictionary, ``key``: path, ``value``: dict of attribute name and attribute value. + + Path should contain `Node` name, separated by `sep`. + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + - Path separator `sep` is for the input `path` and can be different from that of existing tree. + + Path can start from root node `name`, or start with `sep`. + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + >>> from bigtree import Node, add_dict_to_tree_by_path, print_tree + >>> root = Node("a") + >>> path_dict = { + ... "a": {"age": 90}, + ... "a/b": {"age": 65}, + ... "a/c": {"age": 60}, + ... "a/b/d": {"age": 40}, + ... "a/b/e": {"age": 35}, + ... "a/c/f": {"age": 38}, + ... "a/b/e/g": {"age": 10}, + ... "a/b/e/h": {"age": 6}, + ... } + >>> root = add_dict_to_tree_by_path(root, path_dict) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + Args: + tree (Node): existing tree + path_attrs (dict): dictionary containing node path and attribute information, + key: node path, value: dict of node attribute name and attribute value + sep (str): path separator for input `path_attrs` + duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True + + Returns: + (Node) + """ + if not len(path_attrs): + raise ValueError("Dictionary does not contain any data, check `path_attrs`") + + tree_root = tree.root + + for k, v in path_attrs.items(): + add_path_to_tree( + tree_root, + k, + sep=sep, + duplicate_name_allowed=duplicate_name_allowed, + node_attrs=v, + ) + return tree_root + + +def add_dict_to_tree_by_name( + tree: Node, path_attrs: dict, join_type: str = "left" +) -> Node: + """Add attributes to tree, return *new* root of tree. + Adds to existing tree from nested dictionary, ``key``: name, ``value``: dict of attribute name and attribute value. + + Function can return all existing tree nodes or only tree nodes that are in the input dictionary keys. + Input dictionary keys that are not existing node names will be ignored. + Note that if multiple nodes have the same name, attributes will be added to all nodes sharing same name. + + >>> from bigtree import Node, add_dict_to_tree_by_name, print_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> name_dict = { + ... "a": {"age": 90}, + ... "b": {"age": 65}, + ... } + >>> root = add_dict_to_tree_by_name(root, name_dict) + >>> print_tree(root, attr_list=["age"]) + a [age=90] + └── b [age=65] + + Args: + tree (Node): existing tree + path_attrs (dict): dictionary containing node name and attribute information, + key: node name, value: dict of node attribute name and attribute value + join_type (str): join type with attribute, default of 'left' takes existing tree nodes, + if join_type is set to 'inner' it will only take tree nodes that are in `path_attrs` key and drop others + + Returns: + (Node) + """ + if join_type not in ["inner", "left"]: + raise ValueError("`join_type` must be one of 'inner' or 'left'") + + if not len(path_attrs): + raise ValueError("Dictionary does not contain any data, check `path_attrs`") + + # Convert dictionary to dataframe + data = pd.DataFrame(path_attrs).T.rename_axis("NAME").reset_index() + return add_dataframe_to_tree_by_name(tree, data=data, join_type=join_type) + + +def add_dataframe_to_tree_by_path( + tree: Node, + data: pd.DataFrame, + path_col: str = "", + attribute_cols: list = [], + sep: str = "/", + duplicate_name_allowed: bool = True, +) -> Node: + """Add nodes and attributes to tree *in-place*, return root of tree. + + `path_col` and `attribute_cols` specify columns for node path and attributes to add to existing tree. + If columns are not specified, `path_col` takes first column and all other columns are `attribute_cols` + + Path in path column should contain `Node` name, separated by `sep`. + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + - Path separator `sep` is for the input `path_col` and can be different from that of existing tree. + + Path in path column can start from root node `name`, or start with `sep`. + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + >>> import pandas as pd + >>> from bigtree import add_dataframe_to_tree_by_path, print_tree + >>> root = Node("a") + >>> path_data = pd.DataFrame([ + ... ["a", 90], + ... ["a/b", 65], + ... ["a/c", 60], + ... ["a/b/d", 40], + ... ["a/b/e", 35], + ... ["a/c/f", 38], + ... ["a/b/e/g", 10], + ... ["a/b/e/h", 6], + ... ], + ... columns=["PATH", "age"] + ... ) + >>> root = add_dataframe_to_tree_by_path(root, path_data) + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + │ ├── g [age=10] + │ └── h [age=6] + └── c [age=60] + └── f [age=38] + + Args: + tree (Node): existing tree + data (pandas.DataFrame): data containing node path and attribute information + path_col (str): column of data containing `path_name` information, + if not set, it will take the first column of data + attribute_cols (list): columns of data containing node attribute information, + if not set, it will take all columns of data except `path_col` + sep (str): path separator for input `path_col` + duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True + + Returns: + (Node) + """ + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not path_col: + path_col = data.columns[0] + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(path_col) + + tree_root = tree.root + data[path_col] = data[path_col].str.lstrip(sep).str.rstrip(sep) + data2 = data.copy()[[path_col] + attribute_cols].astype(str).drop_duplicates() + _duplicate_check = ( + data2[path_col] + .value_counts() + .to_frame("counts") + .rename_axis(path_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate path with different attributes\nCheck {_duplicate_check}" + ) + + for row in data.to_dict(orient="index").values(): + node_attrs = row.copy() + del node_attrs[path_col] + node_attrs = {k: v for k, v in node_attrs.items() if not np.all(pd.isnull(v))} + add_path_to_tree( + tree_root, + row[path_col], + sep=sep, + duplicate_name_allowed=duplicate_name_allowed, + node_attrs=node_attrs, + ) + return tree_root + + +def add_dataframe_to_tree_by_name( + tree: Node, + data: pd.DataFrame, + name_col: str = "", + attribute_cols: list = [], + join_type: str = "left", +): + """Add attributes to tree, return *new* root of tree. + + `name_col` and `attribute_cols` specify columns for node name and attributes to add to existing tree. + If columns are not specified, the first column will be taken as name column and all other columns as attributes. + + Function can return all existing tree nodes or only tree nodes that are in the input data node names. + Input data node names that are not existing node names will be ignored. + Note that if multiple nodes have the same name, attributes will be added to all nodes sharing same name. + + >>> import pandas as pd + >>> from bigtree import add_dataframe_to_tree_by_name, print_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> name_data = pd.DataFrame([ + ... ["a", 90], + ... ["b", 65], + ... ], + ... columns=["NAME", "age"] + ... ) + >>> root = add_dataframe_to_tree_by_name(root, name_data) + >>> print_tree(root, attr_list=["age"]) + a [age=90] + └── b [age=65] + + Args: + tree (Node): existing tree + data (pandas.DataFrame): data containing node name and attribute information + name_col (str): column of data containing `name` information, + if not set, it will take the first column of data + attribute_cols (list): column(s) of data containing node attribute information, + if not set, it will take all columns of data except path_col + join_type (str): join type with attribute, default of 'left' takes existing tree nodes, + if join_type is set to 'inner' it will only take tree nodes with attributes and drop the other nodes + + Returns: + (Node) + """ + if join_type not in ["inner", "left"]: + raise ValueError("`join_type` must be one of 'inner' or 'left'") + + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not name_col: + name_col = data.columns[0] + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(name_col) + + # Attribute data + path_col = "PATH" + data2 = data.copy()[[name_col] + attribute_cols].astype(str).drop_duplicates() + _duplicate_check = ( + data2[name_col] + .value_counts() + .to_frame("counts") + .rename_axis(name_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate name with different attributes\nCheck {_duplicate_check}" + ) + + # Tree data + tree_root = tree.root + sep = tree_root.sep + node_type = tree_root.__class__ + data_tree = tree_to_dataframe( + tree_root, name_col=name_col, path_col=path_col, all_attrs=True + ) + common_cols = list(set(data_tree.columns).intersection(attribute_cols)) + data_tree = data_tree.drop(columns=common_cols) + + # Attribute data + data_tree_attrs = pd.merge(data_tree, data, on=name_col, how=join_type) + data_tree_attrs = data_tree_attrs.drop(columns=name_col) + + return dataframe_to_tree( + data_tree_attrs, path_col=path_col, sep=sep, node_type=node_type + ) + + +def str_to_tree( + tree_string: str, + tree_prefix_list: List[str] = [], + node_type: Type[Node] = Node, +) -> Node: + r"""Construct tree from tree string + + >>> from bigtree import str_to_tree, print_tree + >>> tree_str = 'a\n├── b\n│ ├── d\n│ └── e\n│ ├── g\n│ └── h\n└── c\n └── f' + >>> root = str_to_tree(tree_str, tree_prefix_list=["├──", "└──"]) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + Args: + tree_string (str): String to construct tree + tree_prefix_list (list): List of prefix to mark the end of tree branch/stem and start of node name, optional. + If not specified, it will infer unicode characters and whitespace as prefix. + node_type (Type[Node]): node type of tree to be created, defaults to Node + + Returns: + (Node) + """ + tree_string = tree_string.strip("\n") + if not len(tree_string): + raise ValueError("Tree string does not contain any data, check `tree_string`") + tree_list = tree_string.split("\n") + tree_root = node_type(tree_list[0]) + + # Infer prefix length + prefix_length = None + cur_parent = tree_root + for node_str in tree_list[1:]: + if len(tree_prefix_list): + node_name = re.split("|".join(tree_prefix_list), node_str)[-1].lstrip() + else: + node_name = node_str.encode("ascii", "ignore").decode("ascii").lstrip() + + # Find node parent + if not prefix_length: + prefix_length = node_str.index(node_name) + if not prefix_length: + raise ValueError( + f"Invalid prefix, prefix should be unicode character or whitespace, " + f"otherwise specify one or more prefixes in `tree_prefix_list`, check: {node_str}" + ) + node_prefix_length = node_str.index(node_name) + if node_prefix_length % prefix_length: + raise ValueError( + f"Tree string have different prefix length, check branch: {node_str}" + ) + while cur_parent.depth > node_prefix_length / prefix_length: + cur_parent = cur_parent.parent + + # Link node + child_node = node_type(node_name) + child_node.parent = cur_parent + cur_parent = child_node + + return tree_root + + +def list_to_tree( + paths: list, + sep: str = "/", + duplicate_name_allowed: bool = True, + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from list of path strings. + + Path should contain `Node` name, separated by `sep`. + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + + Path can start from root node `name`, or start with `sep`. + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + >>> from bigtree import list_to_tree, print_tree + >>> path_list = ["a/b", "a/c", "a/b/d", "a/b/e", "a/c/f", "a/b/e/g", "a/b/e/h"] + >>> root = list_to_tree(path_list) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + Args: + paths (list): list containing path strings + sep (str): path separator for input `paths` and created tree, defaults to `/` + duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True + node_type (Type[Node]): node type of tree to be created, defaults to Node + + Returns: + (Node) + """ + if not len(paths): + raise ValueError("Path list does not contain any data, check `paths`") + + # Remove duplicates + paths = list(OrderedDict.fromkeys(paths)) + + # Construct root node + root_name = paths[0].lstrip(sep).split(sep)[0] + root_node = node_type(root_name) + root_node.sep = sep + + for path in paths: + add_path_to_tree( + root_node, path, sep=sep, duplicate_name_allowed=duplicate_name_allowed + ) + root_node.sep = sep + return root_node + + +def list_to_tree_by_relation( + relations: List[Tuple[str, str]], + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from list of tuple containing parent-child names. + + Note that node names must be unique since tree is created from parent-child names, + except for leaf nodes - names of leaf nodes may be repeated as there is no confusion. + + >>> from bigtree import list_to_tree_by_relation, print_tree + >>> relations_list = [("a", "b"), ("a", "c"), ("b", "d"), ("b", "e"), ("c", "f"), ("e", "g"), ("e", "h")] + >>> root = list_to_tree_by_relation(relations_list) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + Args: + relations (list): list containing tuple containing parent-child names + node_type (Type[Node]): node type of tree to be created, defaults to Node + + Returns: + (Node) + """ + if not len(relations): + raise ValueError("Path list does not contain any data, check `relations`") + + relation_data = pd.DataFrame(relations, columns=["parent", "child"]) + return dataframe_to_tree_by_relation( + relation_data, child_col="child", parent_col="parent", node_type=node_type + ) + + +def dict_to_tree( + path_attrs: dict, + sep: str = "/", + duplicate_name_allowed: bool = True, + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from nested dictionary using path, + ``key``: path, ``value``: dict of attribute name and attribute value. + + Path should contain `Node` name, separated by `sep`. + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + + Path can start from root node `name`, or start with `sep`. + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + All paths should start from the same root node. + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + >>> from bigtree import dict_to_tree, print_tree + >>> path_dict = { + ... "a": {"age": 90}, + ... "a/b": {"age": 65}, + ... "a/c": {"age": 60}, + ... "a/b/d": {"age": 40}, + ... "a/b/e": {"age": 35}, + ... "a/c/f": {"age": 38}, + ... "a/b/e/g": {"age": 10}, + ... "a/b/e/h": {"age": 6}, + ... } + >>> root = dict_to_tree(path_dict) + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + │ ├── g [age=10] + │ └── h [age=6] + └── c [age=60] + └── f [age=38] + + Args: + path_attrs (dict): dictionary containing path and node attribute information, + key: path, value: dict of tree attribute and attribute value + sep (str): path separator of input `path_attrs` and created tree, defaults to `/` + duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True + node_type (Type[Node]): node type of tree to be created, defaults to Node + + Returns: + (Node) + """ + if not len(path_attrs): + raise ValueError("Dictionary does not contain any data, check `path_attrs`") + + # Convert dictionary to dataframe + data = pd.DataFrame(path_attrs).T.rename_axis("PATH").reset_index() + return dataframe_to_tree( + data, + sep=sep, + duplicate_name_allowed=duplicate_name_allowed, + node_type=node_type, + ) + + +def nested_dict_to_tree( + node_attrs: dict, + name_key: str = "name", + child_key: str = "children", + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from nested recursive dictionary. + - ``key``: `name_key`, `child_key`, or any attributes key. + - ``value`` of `name_key` (str): node name. + - ``value`` of `child_key` (list): list of dict containing `name_key` and `child_key` (recursive). + + >>> from bigtree import nested_dict_to_tree, print_tree + >>> path_dict = { + ... "name": "a", + ... "age": 90, + ... "children": [ + ... {"name": "b", + ... "age": 65, + ... "children": [ + ... {"name": "d", "age": 40}, + ... {"name": "e", "age": 35, "children": [ + ... {"name": "g", "age": 10}, + ... ]}, + ... ]}, + ... ], + ... } + >>> root = nested_dict_to_tree(path_dict) + >>> print_tree(root, attr_list=["age"]) + a [age=90] + └── b [age=65] + ├── d [age=40] + └── e [age=35] + └── g [age=10] + + Args: + node_attrs (dict): dictionary containing node, children, and node attribute information, + key: `name_key` and `child_key` + value of `name_key` (str): node name + value of `child_key` (list): list of dict containing `name_key` and `child_key` (recursive) + name_key (str): key of node name, value is type str + child_key (str): key of child list, value is type list + node_type (Type[Node]): node type of tree to be created, defaults to Node + + Returns: + (Node) + """ + + def recursive_add_child(child_dict, parent_node=None): + child_dict = child_dict.copy() + node_name = child_dict.pop(name_key) + node_children = child_dict.pop(child_key, []) + node = node_type(node_name, parent=parent_node, **child_dict) + for _child in node_children: + recursive_add_child(_child, parent_node=node) + return node + + root_node = recursive_add_child(node_attrs) + return root_node + + +def dataframe_to_tree( + data: pd.DataFrame, + path_col: str = "", + attribute_cols: list = [], + sep: str = "/", + duplicate_name_allowed: bool = True, + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from pandas DataFrame using path, return root of tree. + + `path_col` and `attribute_cols` specify columns for node path and attributes to construct tree. + If columns are not specified, `path_col` takes first column and all other columns are `attribute_cols`. + + Path in path column can start from root node `name`, or start with `sep`. + - For example: Path string can be "/a/b" or "a/b", if sep is "/". + + Path in path column should contain `Node` name, separated by `sep`. + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + + All paths should start from the same root node. + - For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node. + + >>> import pandas as pd + >>> from bigtree import dataframe_to_tree, print_tree + >>> path_data = pd.DataFrame([ + ... ["a", 90], + ... ["a/b", 65], + ... ["a/c", 60], + ... ["a/b/d", 40], + ... ["a/b/e", 35], + ... ["a/c/f", 38], + ... ["a/b/e/g", 10], + ... ["a/b/e/h", 6], + ... ], + ... columns=["PATH", "age"] + ... ) + >>> root = dataframe_to_tree(path_data) + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + │ ├── g [age=10] + │ └── h [age=6] + └── c [age=60] + └── f [age=38] + + Args: + data (pandas.DataFrame): data containing path and node attribute information + path_col (str): column of data containing `path_name` information, + if not set, it will take the first column of data + attribute_cols (list): columns of data containing node attribute information, + if not set, it will take all columns of data except `path_col` + sep (str): path separator of input `path_col` and created tree, defaults to `/` + duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True + node_type (Type[Node]): node type of tree to be created, defaults to Node + + Returns: + (Node) + """ + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not path_col: + path_col = data.columns[0] + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(path_col) + + data[path_col] = data[path_col].str.lstrip(sep).str.rstrip(sep) + data2 = data.copy()[[path_col] + attribute_cols].astype(str).drop_duplicates() + _duplicate_check = ( + data2[path_col] + .value_counts() + .to_frame("counts") + .rename_axis(path_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate path with different attributes\nCheck {_duplicate_check}" + ) + + root_name = data[path_col].values[0].split(sep)[0] + root_node = node_type(root_name) + add_dataframe_to_tree_by_path( + root_node, + data, + sep=sep, + duplicate_name_allowed=duplicate_name_allowed, + ) + root_node.sep = sep + return root_node + + +def dataframe_to_tree_by_relation( + data: pd.DataFrame, + child_col: str = "", + parent_col: str = "", + attribute_cols: list = [], + node_type: Type[Node] = Node, +) -> Node: + """Construct tree from pandas DataFrame using parent and child names, return root of tree. + + Note that node names must be unique since tree is created from parent-child names, + except for leaf nodes - names of leaf nodes may be repeated as there is no confusion. + + `child_col` and `parent_col` specify columns for child name and parent name to construct tree. + `attribute_cols` specify columns for node attribute for child name + If columns are not specified, `child_col` takes first column, `parent_col` takes second column, and all other + columns are `attribute_cols`. + + >>> import pandas as pd + >>> from bigtree import dataframe_to_tree_by_relation, print_tree + >>> relation_data = pd.DataFrame([ + ... ["a", None, 90], + ... ["b", "a", 65], + ... ["c", "a", 60], + ... ["d", "b", 40], + ... ["e", "b", 35], + ... ["f", "c", 38], + ... ["g", "e", 10], + ... ["h", "e", 6], + ... ], + ... columns=["child", "parent", "age"] + ... ) + >>> root = dataframe_to_tree_by_relation(relation_data) + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + │ ├── g [age=10] + │ └── h [age=6] + └── c [age=60] + └── f [age=38] + + Args: + data (pandas.DataFrame): data containing path and node attribute information + child_col (str): column of data containing child name information, defaults to None + if not set, it will take the first column of data + parent_col (str): column of data containing parent name information, defaults to None + if not set, it will take the second column of data + attribute_cols (list): columns of data containing node attribute information, + if not set, it will take all columns of data except `child_col` and `parent_col` + node_type (Type[Node]): node type of tree to be created, defaults to Node + + Returns: + (Node) + """ + if not len(data.columns): + raise ValueError("Data does not contain any columns, check `data`") + if not len(data): + raise ValueError("Data does not contain any rows, check `data`") + + if not child_col: + child_col = data.columns[0] + if not parent_col: + parent_col = data.columns[1] + if not len(attribute_cols): + attribute_cols = list(data.columns) + attribute_cols.remove(child_col) + attribute_cols.remove(parent_col) + + data_check = data.copy()[[child_col, parent_col]].drop_duplicates() + # Filter for child nodes that are parent of other nodes + data_check = data_check[data_check[child_col].isin(data_check[parent_col])] + _duplicate_check = ( + data_check[child_col] + .value_counts() + .to_frame("counts") + .rename_axis(child_col) + .reset_index() + ) + _duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1] + if len(_duplicate_check): + raise ValueError( + f"There exists duplicate child with different parent where the child is also a parent node.\n" + f"Duplicated node names should not happen, but can only exist in leaf nodes to avoid confusion.\n" + f"Check {_duplicate_check}" + ) + + # If parent-child contains None -> root + root_row = data[data[parent_col].isnull()] + root_names = list(root_row[child_col]) + if not len(root_names): + root_names = list(set(data[parent_col]) - set(data[child_col])) + if len(root_names) != 1: + raise ValueError(f"Unable to determine root node\nCheck {root_names}") + root_name = root_names[0] + root_node = node_type(root_name) + + def retrieve_attr(row): + node_attrs = row.copy() + node_attrs["name"] = node_attrs[child_col] + del node_attrs[child_col] + del node_attrs[parent_col] + _node_attrs = {k: v for k, v in node_attrs.items() if not np.all(pd.isnull(v))} + return _node_attrs + + def recursive_create_child(parent_node): + child_rows = data[data[parent_col] == parent_node.node_name] + + for row in child_rows.to_dict(orient="index").values(): + child_node = node_type(**retrieve_attr(row)) + child_node.parent = parent_node + recursive_create_child(child_node) + + # Create root node attributes + if len(root_row): + row = list(root_row.to_dict(orient="index").values())[0] + root_node.set_attrs(retrieve_attr(row)) + recursive_create_child(root_node) + return root_node diff --git a/python37/packages/bigtree/tree/export.py b/python37/packages/bigtree/tree/export.py new file mode 100644 index 0000000..27e924e --- /dev/null +++ b/python37/packages/bigtree/tree/export.py @@ -0,0 +1,831 @@ +import collections +from typing import Any, Dict, List, Tuple, Union + +import pandas as pd + +from bigtree.node.node import Node +from bigtree.tree.search import find_path +from bigtree.utils.iterators import preorder_iter + +__all__ = [ + "print_tree", + "yield_tree", + "tree_to_dict", + "tree_to_nested_dict", + "tree_to_dataframe", + "tree_to_dot", + "tree_to_pillow", +] + + +available_styles = { + "ansi": ("| ", "|-- ", "`-- "), + "ascii": ("| ", "|-- ", "+-- "), + "const": ("\u2502 ", "\u251c\u2500\u2500 ", "\u2514\u2500\u2500 "), + "const_bold": ("\u2503 ", "\u2523\u2501\u2501 ", "\u2517\u2501\u2501 "), + "rounded": ("\u2502 ", "\u251c\u2500\u2500 ", "\u2570\u2500\u2500 "), + "double": ("\u2551 ", "\u2560\u2550\u2550 ", "\u255a\u2550\u2550 "), + "custom": ("", "", ""), +} + + +def print_tree( + tree: Node, + node_name_or_path: str = "", + max_depth: int = None, + attr_list: List[str] = None, + all_attrs: bool = False, + attr_omit_null: bool = True, + attr_bracket: List[str] = ["[", "]"], + style: str = "const", + custom_style: List[str] = [], +): + """Print tree to console, starting from `tree`. + + - Able to select which node to print from, resulting in a subtree, using `node_name_or_path` + - Able to customize for maximum depth to print, using `max_depth` + - Able to choose which attributes to show or show all attributes, using `attr_name_filter` and `all_attrs` + - Able to omit showing of attributes if it is null, using `attr_omit_null` + - Able to customize open and close brackets if attributes are shown, using `attr_bracket` + - Able to customize style, to choose from `ansi`, `ascii`, `const`, `rounded`, `double`, and `custom` style + - Default style is `const` style + - If style is set to custom, user can choose their own style for stem, branch and final stem icons + - Stem, branch, and final stem symbol should have the same number of characters + + **Printing tree** + + >>> from bigtree import Node, print_tree + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + └── c + + **Printing Sub-tree** + + >>> print_tree(root, node_name_or_path="b") + b + ├── d + └── e + + >>> print_tree(root, max_depth=2) + a + ├── b + └── c + + **Printing Attributes** + + >>> print_tree(root, attr_list=["age"]) + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + └── c [age=60] + + >>> print_tree(root, attr_list=["age"], attr_bracket=["*(", ")"]) + a *(age=90) + ├── b *(age=65) + │ ├── d *(age=40) + │ └── e *(age=35) + └── c *(age=60) + + **Available Styles** + + >>> print_tree(root, style="ansi") + a + |-- b + | |-- d + | `-- e + `-- c + + >>> print_tree(root, style="ascii") + a + |-- b + | |-- d + | +-- e + +-- c + + >>> print_tree(root, style="const") + a + ├── b + │ ├── d + │ └── e + └── c + + >>> print_tree(root, style="const_bold") + a + ┣━━ b + ┃ ┣━━ d + ┃ ┗━━ e + ┗━━ c + + >>> print_tree(root, style="rounded") + a + ├── b + │ ├── d + │ ╰── e + ╰── c + + >>> print_tree(root, style="double") + a + ╠══ b + ║ ╠══ d + ║ ╚══ e + ╚══ c + + Args: + tree (Node): tree to print + node_name_or_path (str): node to print from, becomes the root node of printing + max_depth (int): maximum depth of tree to print, based on `depth` attribute, optional + attr_list (list): list of node attributes to print, optional + all_attrs (bool): indicator to show all attributes, overrides `attr_list` + attr_omit_null (bool): indicator whether to omit showing of null attributes, defaults to True + attr_bracket (List[str]): open and close bracket for `all_attrs` or `attr_list` + style (str): style of print, defaults to abstract style + custom_style (List[str]): style of stem, branch and final stem, used when `style` is set to 'custom' + """ + for pre_str, fill_str, _node in yield_tree( + tree=tree, + node_name_or_path=node_name_or_path, + max_depth=max_depth, + style=style, + custom_style=custom_style, + ): + # Get node_str (node name and attributes) + attr_str = "" + if all_attrs or attr_list: + if len(attr_bracket) != 2: + raise ValueError( + f"Expect open and close brackets in `attr_bracket`, received {attr_bracket}" + ) + attr_bracket_open, attr_bracket_close = attr_bracket + if all_attrs: + attrs = _node.describe(exclude_attributes=["name"], exclude_prefix="_") + attr_str_list = [f"{k}={v}" for k, v in attrs] + else: + if attr_omit_null: + attr_str_list = [ + f"{attr_name}={_node.get_attr(attr_name)}" + for attr_name in attr_list + if _node.get_attr(attr_name) + ] + else: + attr_str_list = [ + f"{attr_name}={_node.get_attr(attr_name)}" + for attr_name in attr_list + ] + attr_str = ", ".join(attr_str_list) + if attr_str: + attr_str = f" {attr_bracket_open}{attr_str}{attr_bracket_close}" + node_str = f"{_node.node_name}{attr_str}" + print(f"{pre_str}{fill_str}{node_str}") + + +def yield_tree( + tree: Node, + node_name_or_path: str = "", + max_depth: int = None, + style: str = "const", + custom_style: List[str] = [], +): + """Generator method for customizing printing of tree, starting from `tree`. + + - Able to select which node to print from, resulting in a subtree, using `node_name_or_path` + - Able to customize for maximum depth to print, using `max_depth` + - Able to customize style, to choose from `ansi`, `ascii`, `const`, `rounded`, `double`, and `custom` style + - Default style is `const` style + - If style is set to custom, user can choose their own style for stem, branch and final stem icons + - Stem, branch, and final stem symbol should have the same number of characters + + **Printing tree** + + >>> from bigtree import Node, print_tree + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> for branch, stem, node in yield_tree(root): + ... print(f"{branch}{stem}{node.node_name}") + a + ├── b + │ ├── d + │ └── e + └── c + + **Printing Sub-tree** + + >>> for branch, stem, node in yield_tree(root, node_name_or_path="b"): + ... print(f"{branch}{stem}{node.node_name}") + b + ├── d + └── e + + >>> for branch, stem, node in yield_tree(root, max_depth=2): + ... print(f"{branch}{stem}{node.node_name}") + a + ├── b + └── c + + **Available Styles** + + >>> for branch, stem, node in yield_tree(root, style="ansi"): + ... print(f"{branch}{stem}{node.node_name}") + a + |-- b + | |-- d + | `-- e + `-- c + + >>> for branch, stem, node in yield_tree(root, style="ascii"): + ... print(f"{branch}{stem}{node.node_name}") + a + |-- b + | |-- d + | +-- e + +-- c + + >>> for branch, stem, node in yield_tree(root, style="const"): + ... print(f"{branch}{stem}{node.node_name}") + a + ├── b + │ ├── d + │ └── e + └── c + + >>> for branch, stem, node in yield_tree(root, style="const_bold"): + ... print(f"{branch}{stem}{node.node_name}") + a + ┣━━ b + ┃ ┣━━ d + ┃ ┗━━ e + ┗━━ c + + >>> for branch, stem, node in yield_tree(root, style="rounded"): + ... print(f"{branch}{stem}{node.node_name}") + a + ├── b + │ ├── d + │ ╰── e + ╰── c + + >>> for branch, stem, node in yield_tree(root, style="double"): + ... print(f"{branch}{stem}{node.node_name}") + a + ╠══ b + ║ ╠══ d + ║ ╚══ e + ╚══ c + + **Printing Attributes** + + >>> for branch, stem, node in yield_tree(root, style="const"): + ... print(f"{branch}{stem}{node.node_name} [age={node.age}]") + a [age=90] + ├── b [age=65] + │ ├── d [age=40] + │ └── e [age=35] + └── c [age=60] + + Args: + tree (Node): tree to print + node_name_or_path (str): node to print from, becomes the root node of printing, optional + max_depth (int): maximum depth of tree to print, based on `depth` attribute, optional + style (str): style of print, defaults to abstract style + custom_style (List[str]): style of stem, branch and final stem, used when `style` is set to 'custom' + """ + if style not in available_styles.keys(): + raise ValueError( + f"Choose one of {available_styles.keys()} style, use `custom` to define own style" + ) + + tree = tree.copy() + if node_name_or_path: + tree = find_path(tree, node_name_or_path) + if not tree.is_root: + tree.parent = None + + # Set style + if style == "custom": + if len(custom_style) != 3: + raise ValueError( + "Custom style selected, please specify the style of stem, branch, and final stem in `custom_style`" + ) + style_stem, style_branch, style_stem_final = custom_style + else: + style_stem, style_branch, style_stem_final = available_styles[style] + + if not len(style_stem) == len(style_branch) == len(style_stem_final): + raise ValueError( + "`style_stem`, `style_branch`, and `style_stem_final` are of different length" + ) + + gap_str = " " * len(style_stem) + unclosed_depth = set() + initial_depth = tree.depth + for _node in preorder_iter(tree, max_depth=max_depth): + pre_str = "" + fill_str = "" + if not _node.is_root: + node_depth = _node.depth - initial_depth + + # Get fill_str (style_branch or style_stem_final) + if _node.right_sibling: + unclosed_depth.add(node_depth) + fill_str = style_branch + else: + if node_depth in unclosed_depth: + unclosed_depth.remove(node_depth) + fill_str = style_stem_final + + # Get pre_str (style_stem, style_branch, style_stem_final, or gap) + pre_str = "" + for _depth in range(1, node_depth): + if _depth in unclosed_depth: + pre_str += style_stem + else: + pre_str += gap_str + + yield pre_str, fill_str, _node + + +def tree_to_dict( + tree: Node, + name_key: str = "name", + parent_key: str = "", + attr_dict: dict = {}, + all_attrs: bool = False, + max_depth: int = None, + skip_depth: int = None, + leaf_only: bool = False, +) -> Dict[str, Any]: + """Export tree to dictionary. + + All descendants from `tree` will be exported, `tree` can be the root node or child node of tree. + + Exported dictionary will have key as node path, and node attributes as a nested dictionary. + + >>> from bigtree import Node, tree_to_dict + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> tree_to_dict(root, name_key="name", parent_key="parent", attr_dict={"age": "person age"}) + {'/a': {'name': 'a', 'parent': None, 'person age': 90}, '/a/b': {'name': 'b', 'parent': 'a', 'person age': 65}, '/a/b/d': {'name': 'd', 'parent': 'b', 'person age': 40}, '/a/b/e': {'name': 'e', 'parent': 'b', 'person age': 35}, '/a/c': {'name': 'c', 'parent': 'a', 'person age': 60}} + + For a subset of a tree + + >>> tree_to_dict(c, name_key="name", parent_key="parent", attr_dict={"age": "person age"}) + {'/a/c': {'name': 'c', 'parent': 'a', 'person age': 60}} + + Args: + tree (Node): tree to be exported + name_key (str): dictionary key for `node.node_name`, defaults to 'name' + parent_key (str): dictionary key for `node.parent.node_name`, optional + attr_dict (dict): dictionary mapping node attributes to dictionary key, + key: node attributes, value: corresponding dictionary key, optional + all_attrs (bool): indicator whether to retrieve all `Node` attributes + max_depth (int): maximum depth to export tree, optional + skip_depth (int): number of initial depth to skip, optional + leaf_only (bool): indicator to retrieve only information from leaf nodes + + Returns: + (dict) + """ + tree = tree.copy() + data_dict = {} + + def recursive_append(node): + if node: + if ( + (not max_depth or node.depth <= max_depth) + and (not skip_depth or node.depth > skip_depth) + and (not leaf_only or node.is_leaf) + ): + data_child = {} + if name_key: + data_child[name_key] = node.node_name + if parent_key: + parent_name = None + if node.parent: + parent_name = node.parent.node_name + data_child[parent_key] = parent_name + if all_attrs: + data_child.update( + dict( + node.describe( + exclude_attributes=["name"], exclude_prefix="_" + ) + ) + ) + else: + for k, v in attr_dict.items(): + data_child[v] = node.get_attr(k) + data_dict[node.path_name] = data_child + for _node in node.children: + recursive_append(_node) + + recursive_append(tree) + return data_dict + + +def tree_to_nested_dict( + tree: Node, + name_key: str = "name", + child_key: str = "children", + attr_dict: dict = {}, + all_attrs: bool = False, + max_depth: int = None, +) -> Dict[str, Any]: + """Export tree to nested dictionary. + + All descendants from `tree` will be exported, `tree` can be the root node or child node of tree. + + Exported dictionary will have key as node attribute names, and children as a nested recursive dictionary. + + >>> from bigtree import Node, tree_to_nested_dict + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> tree_to_nested_dict(root, all_attrs=True) + {'name': 'a', 'age': 90, 'children': [{'name': 'b', 'age': 65, 'children': [{'name': 'd', 'age': 40}, {'name': 'e', 'age': 35}]}, {'name': 'c', 'age': 60}]} + + Args: + tree (Node): tree to be exported + name_key (str): dictionary key for `node.node_name`, defaults to 'name' + child_key (str): dictionary key for list of children, optional + attr_dict (dict): dictionary mapping node attributes to dictionary key, + key: node attributes, value: corresponding dictionary key, optional + all_attrs (bool): indicator whether to retrieve all `Node` attributes + max_depth (int): maximum depth to export tree, optional + + Returns: + (dict) + """ + tree = tree.copy() + data_dict = {} + + def recursive_append(node, parent_dict): + if node: + if not max_depth or node.depth <= max_depth: + data_child = {name_key: node.node_name} + if all_attrs: + data_child.update( + dict( + node.describe( + exclude_attributes=["name"], exclude_prefix="_" + ) + ) + ) + else: + for k, v in attr_dict.items(): + data_child[v] = node.get_attr(k) + if child_key in parent_dict: + parent_dict[child_key].append(data_child) + else: + parent_dict[child_key] = [data_child] + + for _node in node.children: + recursive_append(_node, data_child) + + recursive_append(tree, data_dict) + return data_dict[child_key][0] + + +def tree_to_dataframe( + tree: Node, + path_col: str = "path", + name_col: str = "name", + parent_col: str = "", + attr_dict: dict = {}, + all_attrs: bool = False, + max_depth: int = None, + skip_depth: int = None, + leaf_only: bool = False, +) -> pd.DataFrame: + """Export tree to pandas DataFrame. + + All descendants from `tree` will be exported, `tree` can be the root node or child node of tree. + + >>> from bigtree import Node, tree_to_dataframe + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> tree_to_dataframe(root, name_col="name", parent_col="parent", path_col="path", attr_dict={"age": "person age"}) + path name parent person age + 0 /a a None 90 + 1 /a/b b a 65 + 2 /a/b/d d b 40 + 3 /a/b/e e b 35 + 4 /a/c c a 60 + + + For a subset of a tree. + + >>> tree_to_dataframe(b, name_col="name", parent_col="parent", path_col="path", attr_dict={"age": "person age"}) + path name parent person age + 0 /a/b b a 65 + 1 /a/b/d d b 40 + 2 /a/b/e e b 35 + + Args: + tree (Node): tree to be exported + path_col (str): column name for `node.path_name`, optional + name_col (str): column name for `node.node_name`, defaults to 'name' + parent_col (str): column name for `node.parent.node_name`, optional + attr_dict (dict): dictionary mapping node attributes to column name, + key: node attributes, value: corresponding column in dataframe, optional + all_attrs (bool): indicator whether to retrieve all `Node` attributes + max_depth (int): maximum depth to export tree, optional + skip_depth (int): number of initial depth to skip, optional + leaf_only (bool): indicator to retrieve only information from leaf nodes + + Returns: + (pd.DataFrame) + """ + tree = tree.copy() + data_list = [] + + def recursive_append(node): + if node: + if ( + (not max_depth or node.depth <= max_depth) + and (not skip_depth or node.depth > skip_depth) + and (not leaf_only or node.is_leaf) + ): + data_child = {} + if path_col: + data_child[path_col] = node.path_name + if name_col: + data_child[name_col] = node.node_name + if parent_col: + parent_name = None + if node.parent: + parent_name = node.parent.node_name + data_child[parent_col] = parent_name + + if all_attrs: + data_child.update( + node.describe(exclude_attributes=["name"], exclude_prefix="_") + ) + else: + for k, v in attr_dict.items(): + data_child[v] = node.get_attr(k) + data_list.append(data_child) + for _node in node.children: + recursive_append(_node) + + recursive_append(tree) + return pd.DataFrame(data_list) + + +def tree_to_dot( + tree: Union[Node, List[Node]], + directed: bool = True, + rankdir: str = "TB", + bg_colour: str = None, + node_colour: str = None, + node_shape: str = None, + edge_colour: str = None, + node_attr: str = None, + edge_attr: str = None, +): + r"""Export tree or list of trees to image. + Posible node attributes include style, fillcolor, shape. + + >>> from bigtree import Node, tree_to_dot + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> graph = tree_to_dot(root) + + Export to image, dot file, etc. + + >>> graph.write_png("tree.png") + >>> graph.write_dot("tree.dot") + + Export to string + + >>> graph.to_string() + 'strict digraph G {\nrankdir=TB;\na0 [label=a];\nb0 [label=b];\na0 -> b0;\nd0 [label=d];\nb0 -> d0;\ne0 [label=e];\nb0 -> e0;\nc0 [label=c];\na0 -> c0;\n}\n' + + Defining node and edge attributes + + >>> class CustomNode(Node): + ... def __init__(self, name, node_shape="", edge_label="", **kwargs): + ... super().__init__(name, **kwargs) + ... self.node_shape = node_shape + ... self.edge_label = edge_label + ... + ... @property + ... def edge_attr(self): + ... if self.edge_label: + ... return {"label": self.edge_label} + ... return {} + ... + ... @property + ... def node_attr(self): + ... if self.node_shape: + ... return {"shape": self.node_shape} + ... return {} + >>> + >>> + >>> root = CustomNode("a", node_shape="circle") + >>> b = CustomNode("b", edge_label="child", parent=root) + >>> c = CustomNode("c", edge_label="child", parent=root) + >>> d = CustomNode("d", node_shape="square", edge_label="child", parent=b) + >>> e = CustomNode("e", node_shape="square", edge_label="child", parent=b) + >>> graph = tree_to_dot(root, node_colour="gold", node_shape="diamond", node_attr="node_attr", edge_attr="edge_attr") + >>> graph.write_png("assets/custom_tree.png") + + .. image:: https://github.com/kayjan/bigtree/raw/master/assets/custom_tree.png + + Args: + tree (Node/List[Node]): tree or list of trees to be exported + directed (bool): indicator whether graph should be directed or undirected, defaults to True + rankdir (str): set direction of graph layout, defaults to 'TB' (top to bottom), can be 'BT' (bottom to top), + 'LR' (left to right), 'RL' (right to left) + bg_colour (str): background color of image, defaults to None + node_colour (str): fill colour of nodes, defaults to None + node_shape (str): shape of nodes, defaults to None + Possible node_shape include "circle", "square", "diamond", "triangle" + edge_colour (str): colour of edges, defaults to None + node_attr (str): `Node` attribute for node style, overrides `node_colour` and `node_shape`, defaults to None. + Possible node style (attribute value) include {"style": "filled", "fillcolor": "gold", "shape": "diamond"} + edge_attr (str): `Node` attribute for edge style, overrides `edge_colour`, defaults to None. + Possible edge style (attribute value) include {"style": "bold", "label": "edge label", "color": "black"} + + Returns: + (pydot.Dot) + """ + try: + import pydot + except ImportError: # pragma: no cover + raise ImportError( + "pydot not available. Please perform a\n\npip install 'bigtree[image]'\n\nto install required dependencies" + ) + + # Get style + if bg_colour: + graph_style = dict(bgcolor=bg_colour) + else: + graph_style = dict() + + if node_colour: + node_style = dict(style="filled", fillcolor=node_colour) + else: + node_style = dict() + + if node_shape: + node_style["shape"] = node_shape + + if edge_colour: + edge_style = dict(color=edge_colour) + else: + edge_style = dict() + + tree = tree.copy() + + if directed: + _graph = pydot.Dot( + graph_type="digraph", strict=True, rankdir=rankdir, **graph_style + ) + else: + _graph = pydot.Dot( + graph_type="graph", strict=True, rankdir=rankdir, **graph_style + ) + + if not isinstance(tree, list): + tree = [tree] + + for _tree in tree: + if not isinstance(_tree, Node): + raise ValueError("Tree should be of type `Node`, or inherit from `Node`") + + name_dict = collections.defaultdict(list) + + def recursive_create_node_and_edges(parent_name, child_node): + _node_style = node_style.copy() + _edge_style = edge_style.copy() + + child_label = child_node.node_name + if child_node.path_name not in name_dict[child_label]: # pragma: no cover + name_dict[child_label].append(child_node.path_name) + child_name = child_label + str( + name_dict[child_label].index(child_node.path_name) + ) + if node_attr and child_node.get_attr(node_attr): + _node_style.update(child_node.get_attr(node_attr)) + if edge_attr: + _edge_style.update(child_node.get_attr(edge_attr)) + node = pydot.Node(name=child_name, label=child_label, **_node_style) + _graph.add_node(node) + if parent_name is not None: + edge = pydot.Edge(parent_name, child_name, **_edge_style) + _graph.add_edge(edge) + for child in child_node.children: + if child: + recursive_create_node_and_edges(child_name, child) + + recursive_create_node_and_edges(None, _tree.root) + return _graph + + +def tree_to_pillow( + tree: Node, + width: int = 0, + height: int = 0, + start_pos: Tuple[float, float] = (10, 10), + font_family: str = "assets/DejaVuSans.ttf", + font_size: int = 12, + font_colour: Union[Tuple[float, float, float], str] = "black", + bg_colour: Union[Tuple[float, float, float], str] = "white", + **kwargs, +): + """Export tree to image (JPG, PNG). + Image will be similar format as `print_tree`, accepts additional keyword arguments as input to `yield_tree` + + >>> from bigtree import Node, tree_to_pillow + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=b) + >>> e = Node("e", age=35, parent=b) + >>> pillow_image = tree_to_pillow(root) + + Export to image (PNG, JPG) file, etc. + + >>> pillow_image.save("tree_pillow.png") + >>> pillow_image.save("tree_pillow.jpg") + + Args: + tree (Node): tree to be exported + width (int): width of image, optional as width of image is calculated automatically + height (int): height of image, optional as height of image is calculated automatically + start_pos (Tuple[float, float]): start position of text, (x-offset, y-offset), defaults to (10, 10) + font_family (str): file path of font family, requires .ttf file, defaults to DejaVuSans + font_size (int): font size, defaults to 12 + font_colour (Union[List[int], str]): font colour, accepts tuple of RGB values or string, defaults to black + bg_colour (Union[List[int], str]): background of image, accepts tuple of RGB values or string, defaults to white + + Returns: + (PIL.Image.Image) + """ + try: + from PIL import Image, ImageDraw, ImageFont + except ImportError: # pragma: no cover + raise ImportError( + "Pillow not available. Please perform a\n\npip install 'bigtree[image]'\n\nto install required dependencies" + ) + + # Initialize font + font = ImageFont.truetype(font_family, font_size) + + # Initialize text + image_text = [] + for branch, stem, node in yield_tree(tree, **kwargs): + image_text.append(f"{branch}{stem}{node.node_name}\n") + + # Calculate image dimension from text, otherwise override with argument + def get_list_of_text_dimensions(text_list): + """Get list dimensions + + Args: + text_list (List[str]): list of texts + + Returns: + (List[Iterable[int]]): list of (left, top, right, bottom) bounding box + """ + _image = Image.new("RGB", (0, 0)) + _draw = ImageDraw.Draw(_image) + return [_draw.textbbox((0, 0), text_line, font=font) for text_line in text_list] + + text_dimensions = get_list_of_text_dimensions(image_text) + text_height = sum( + [text_dimension[3] + text_dimension[1] for text_dimension in text_dimensions] + ) + text_width = max( + [text_dimension[2] + text_dimension[0] for text_dimension in text_dimensions] + ) + image_text = "".join(image_text) + width = max(width, text_width + 2 * start_pos[0]) + height = max(height, text_height + 2 * start_pos[1]) + + # Initialize and draw image + image = Image.new("RGB", (width, height), bg_colour) + image_draw = ImageDraw.Draw(image) + image_draw.text(start_pos, image_text, font=font, fill=font_colour) + return image diff --git a/python37/packages/bigtree/tree/helper.py b/python37/packages/bigtree/tree/helper.py new file mode 100644 index 0000000..503cf21 --- /dev/null +++ b/python37/packages/bigtree/tree/helper.py @@ -0,0 +1,201 @@ +from typing import Optional, Type + +import numpy as np + +from bigtree.node.basenode import BaseNode +from bigtree.node.binarynode import BinaryNode +from bigtree.node.node import Node +from bigtree.tree.construct import dataframe_to_tree +from bigtree.tree.export import tree_to_dataframe +from bigtree.tree.search import find_path +from bigtree.utils.exceptions import NotFoundError + +__all__ = ["clone_tree", "prune_tree", "get_tree_diff"] + + +def clone_tree(tree: BaseNode, node_type: Type[BaseNode]) -> BaseNode: + """Clone tree to another `Node` type. + If the same type is needed, simply do a tree.copy(). + + >>> from bigtree import BaseNode, Node, clone_tree + >>> root = BaseNode(name="a") + >>> b = BaseNode(name="b", parent=root) + >>> clone_tree(root, Node) + Node(/a, ) + + Args: + tree (BaseNode): tree to be cloned, must inherit from BaseNode + node_type (Type[BaseNode]): type of cloned tree + + Returns: + (BaseNode) + """ + if not isinstance(tree, BaseNode): + raise ValueError( + "Tree should be of type `BaseNode`, or inherit from `BaseNode`" + ) + + # Start from root + root_info = dict(tree.root.describe(exclude_prefix="_")) + root_node = node_type(**root_info) + + def recursive_add_child(_new_parent_node, _parent_node): + for _child in _parent_node.children: + if _child: + child_info = dict(_child.describe(exclude_prefix="_")) + child_node = node_type(**child_info) + child_node.parent = _new_parent_node + recursive_add_child(child_node, _child) + + recursive_add_child(root_node, tree.root) + return root_node + + +def prune_tree(tree: Node, prune_path: str, sep: str = "/") -> Node: + """Prune tree to leave only the prune path, returns the root of a *copy* of the original tree. + + All siblings along the prune path will be removed. + Prune path name should be unique, can be full path or partial path (trailing part of path) or node name. + + Path should contain `Node` name, separated by `sep`. + - For example: Path string "a/b" refers to Node("b") with parent Node("a"). + + >>> from bigtree import Node, prune_tree, print_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> print_tree(root) + a + ├── b + └── c + + >>> root_pruned = prune_tree(root, "a/b") + >>> print_tree(root_pruned) + a + └── b + + Args: + tree (Node): existing tree + prune_path (str): prune path, all siblings along the prune path will be removed + sep (str): path separator + + Returns: + (Node) + """ + prune_path = prune_path.replace(sep, tree.sep) + tree_copy = tree.copy() + child = find_path(tree_copy, prune_path) + if not child: + raise NotFoundError( + f"Cannot find any node matching path_name ending with {prune_path}" + ) + + if isinstance(child.parent, BinaryNode): + while child.parent: + child.parent.children = [child, None] + child = child.parent + return tree_copy + + while child.parent: + child.parent.children = [child] + child = child.parent + return tree_copy + + +def get_tree_diff( + tree: Node, other_tree: Node, only_diff: bool = True +) -> Optional[Node]: + """Get difference of `tree` to `other_tree`, changes are relative to `tree`. + + (+) and (-) will be added relative to `tree`. + - For example: (+) refers to nodes that are in `other_tree` but not `tree`. + - For example: (-) refers to nodes that are in `tree` but not `other_tree`. + + Note that only leaf nodes are compared and have (+) or (-) indicator. Intermediate parent nodes are not compared. + + Function can return all original tree nodes and differences, or only the differences. + + >>> from bigtree import Node, get_tree_diff, print_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=b) + >>> e = Node("e", parent=root) + >>> print_tree(root) + a + ├── b + │ └── d + ├── c + └── e + + >>> root_other = Node("a") + >>> b_other = Node("b", parent=root_other) + >>> c_other = Node("c", parent=b_other) + >>> d_other = Node("d", parent=root_other) + >>> e_other = Node("e", parent=root_other) + >>> print_tree(root_other) + a + ├── b + │ └── c + ├── d + └── e + + >>> tree_diff = get_tree_diff(root, root_other) + >>> print_tree(tree_diff) + a + ├── b + │ ├── c (+) + │ └── d (-) + ├── c (-) + └── d (+) + + >>> tree_diff = get_tree_diff(root, root_other, only_diff=False) + >>> print_tree(tree_diff) + a + ├── b + │ ├── c (+) + │ └── d (-) + ├── c (-) + ├── d (+) + └── e + + Args: + tree (Node): tree to be compared against + other_tree (Node): tree to be compared with + only_diff (bool): indicator to show all nodes or only nodes that are different (+/-), defaults to True + + Returns: + (Node) + """ + tree = tree.copy() + other_tree = other_tree.copy() + name_col = "name" + path_col = "PATH" + indicator_col = "Exists" + + data = tree_to_dataframe(tree, name_col=name_col, path_col=path_col, leaf_only=True) + data_other = tree_to_dataframe( + other_tree, name_col=name_col, path_col=path_col, leaf_only=True + ) + data_both = data[[path_col, name_col]].merge( + data_other[[path_col, name_col]], how="outer", indicator=indicator_col + ) + + data_both[name_col] = np.where( + data_both[indicator_col] == "left_only", + data_both[name_col] + " (-)", + np.where( + data_both[indicator_col] == "right_only", + data_both[name_col] + " (+)", + data_both[name_col], + ), + ) + + if only_diff: + data_both = data_both.query(f"{indicator_col} != 'both'") + data_both = data_both.drop(columns=indicator_col).sort_values(path_col) + if len(data_both): + return dataframe_to_tree( + data_both, + node_type=tree.__class__, + ) diff --git a/python37/packages/bigtree/tree/modify.py b/python37/packages/bigtree/tree/modify.py new file mode 100644 index 0000000..10b9658 --- /dev/null +++ b/python37/packages/bigtree/tree/modify.py @@ -0,0 +1,856 @@ +import logging +from typing import List, Optional + +from bigtree.node.node import Node +from bigtree.tree.search import find_path +from bigtree.utils.exceptions import NotFoundError, TreeError + +logging.getLogger(__name__).addHandler(logging.NullHandler()) + + +__all__ = [ + "shift_nodes", + "copy_nodes", + "copy_nodes_from_tree_to_tree", + "copy_or_shift_logic", +] + + +def shift_nodes( + tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + skippable: bool = False, + overriding: bool = False, + merge_children: bool = False, + merge_leaves: bool = False, + delete_children: bool = False, +): + """Shift nodes from `from_paths` to `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable). + - Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden). + - Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged). + - Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged) + - Able to shift node only and delete children, defaults to False (nodes are shifted together with children). + + For paths in `from_paths` and `to_paths`, + - Path name can be with or without leading tree path separator symbol. + - Path name can be partial path (trailing part of path) or node name. + - Path name must be unique to one node. + + For paths in `to_paths`, + - Can set to empty string or None to delete the path in `from_paths`, note that ``copy`` must be set to False. + + If ``merge_children=True``, + - If `to_path` is not present, it shifts children of `from_path`. + - If `to_path` is present, and ``overriding=False``, original and new children are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new children are retained. + + If ``merge_leaves=True``, + - If `to_path` is not present, it shifts leaves of `from_path`. + - If `to_path` is present, and ``overriding=False``, original children and leaves are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained, + original node in `from_path` is retained. + + >>> from bigtree import Node, shift_nodes, print_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=root) + >>> print_tree(root) + a + ├── b + ├── c + └── d + + >>> shift_nodes(root, ["a/c", "a/d"], ["a/b/c", "a/dummy/d"]) + >>> print_tree(root) + a + ├── b + │ └── c + └── dummy + └── d + + To delete node, + + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> print_tree(root) + a + ├── b + └── c + + >>> shift_nodes(root, ["a/b"], [None]) + >>> print_tree(root) + a + └── c + + In overriding case, + + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=c) + >>> c2 = Node("c", parent=b) + >>> e = Node("e", parent=c2) + >>> print_tree(root) + a + ├── b + │ └── c + │ └── e + └── c + └── d + + >>> shift_nodes(root, ["a/b/c"], ["a/c"], overriding=True) + >>> print_tree(root) + a + ├── b + └── c + └── e + + In ``merge_children`` case, child nodes are shifted instead of the parent node. + - If the path already exists, child nodes are merged with existing children. + - If same node is shifted, the child nodes of the node are merged with the node's parent. + + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=c) + >>> c2 = Node("c", parent=b) + >>> e = Node("e", parent=c2) + >>> z = Node("z", parent=b) + >>> y = Node("y", parent=z) + >>> f = Node("f", parent=root) + >>> g = Node("g", parent=f) + >>> h = Node("h", parent=g) + >>> print_tree(root) + a + ├── b + │ ├── c + │ │ └── e + │ └── z + │ └── y + ├── c + │ └── d + └── f + └── g + └── h + + >>> shift_nodes(root, ["a/b/c", "z", "a/f"], ["a/c", "a/z", "a/f"], merge_children=True) + >>> print_tree(root) + a + ├── b + ├── c + │ ├── d + │ └── e + ├── y + └── g + └── h + + In ``merge_leaves`` case, leaf nodes are copied instead of the parent node. + - If the path already exists, leaf nodes are merged with existing children. + - If same node is copied, the leaf nodes of the node are merged with the node's parent. + + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=c) + >>> c2 = Node("c", parent=b) + >>> e = Node("e", parent=c2) + >>> z = Node("z", parent=b) + >>> y = Node("y", parent=z) + >>> f = Node("f", parent=root) + >>> g = Node("g", parent=f) + >>> h = Node("h", parent=g) + >>> print_tree(root) + a + ├── b + │ ├── c + │ │ └── e + │ └── z + │ └── y + ├── c + │ └── d + └── f + └── g + └── h + + >>> shift_nodes(root, ["a/b/c", "z", "a/f"], ["a/c", "a/z", "a/f"], merge_leaves=True) + >>> print_tree(root) + a + ├── b + │ ├── c + │ └── z + ├── c + │ ├── d + │ └── e + ├── f + │ └── g + ├── y + └── h + + In ``delete_children`` case, only the node is shifted without its accompanying children/descendants. + + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=c) + >>> c2 = Node("c", parent=b) + >>> e = Node("e", parent=c2) + >>> z = Node("z", parent=b) + >>> y = Node("y", parent=z) + >>> print_tree(root) + a + ├── b + │ ├── c + │ │ └── e + │ └── z + │ └── y + └── c + └── d + + >>> shift_nodes(root, ["a/b/z"], ["a/z"], delete_children=True) + >>> print_tree(root) + a + ├── b + │ └── c + │ └── e + ├── c + │ └── d + └── z + + Args: + tree (Node): tree to modify + from_paths (list): original paths to shift nodes from + to_paths (list): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + skippable (bool): indicator to skip if from path is not found, defaults to False + overriding (bool): indicator to override existing to path if there is clashes, defaults to False + merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False + merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False + delete_children (bool): indicator to shift node only without children, defaults to False + """ + return copy_or_shift_logic( + tree=tree, + from_paths=from_paths, + to_paths=to_paths, + sep=sep, + copy=False, + skippable=skippable, + overriding=overriding, + merge_children=merge_children, + merge_leaves=merge_leaves, + delete_children=delete_children, + to_tree=None, + ) # pragma: no cover + + +def copy_nodes( + tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + skippable: bool = False, + overriding: bool = False, + merge_children: bool = False, + merge_leaves: bool = False, + delete_children: bool = False, +): + """Copy nodes from `from_paths` to `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable). + - Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden). + - Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged). + - Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged) + - Able to copy node only and delete children, defaults to False (nodes are copied together with children). + + For paths in `from_paths` and `to_paths`, + - Path name can be with or without leading tree path separator symbol. + - Path name can be partial path (trailing part of path) or node name. + - Path name must be unique to one node. + + If ``merge_children=True``, + - If `to_path` is not present, it copies children of `from_path`. + - If `to_path` is present, and ``overriding=False``, original and new children are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new children are retained. + + If ``merge_leaves=True``, + - If `to_path` is not present, it copies leaves of `from_path`. + - If `to_path` is present, and ``overriding=False``, original children and leaves are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained. + + >>> from bigtree import Node, copy_nodes, print_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=root) + >>> print_tree(root) + a + ├── b + ├── c + └── d + + >>> copy_nodes(root, ["a/c", "a/d"], ["a/b/c", "a/dummy/d"]) + >>> print_tree(root) + a + ├── b + │ └── c + ├── c + ├── d + └── dummy + └── d + + In overriding case, + + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=c) + >>> c2 = Node("c", parent=b) + >>> e = Node("e", parent=c2) + >>> print_tree(root) + a + ├── b + │ └── c + │ └── e + └── c + └── d + + >>> copy_nodes(root, ["a/b/c"], ["a/c"], overriding=True) + >>> print_tree(root) + a + ├── b + │ └── c + │ └── e + └── c + └── e + + In ``merge_children`` case, child nodes are copied instead of the parent node. + - If the path already exists, child nodes are merged with existing children. + - If same node is copied, the child nodes of the node are merged with the node's parent. + + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=c) + >>> c2 = Node("c", parent=b) + >>> e = Node("e", parent=c2) + >>> z = Node("z", parent=b) + >>> y = Node("y", parent=z) + >>> f = Node("f", parent=root) + >>> g = Node("g", parent=f) + >>> h = Node("h", parent=g) + >>> print_tree(root) + a + ├── b + │ ├── c + │ │ └── e + │ └── z + │ └── y + ├── c + │ └── d + └── f + └── g + └── h + + >>> copy_nodes(root, ["a/b/c", "z", "a/f"], ["a/c", "a/z", "a/f"], merge_children=True) + >>> print_tree(root) + a + ├── b + │ ├── c + │ │ └── e + │ └── z + │ └── y + ├── c + │ ├── d + │ └── e + ├── y + └── g + └── h + + In ``merge_leaves`` case, leaf nodes are copied instead of the parent node. + - If the path already exists, leaf nodes are merged with existing children. + - If same node is copied, the leaf nodes of the node are merged with the node's parent. + + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=c) + >>> c2 = Node("c", parent=b) + >>> e = Node("e", parent=c2) + >>> z = Node("z", parent=b) + >>> y = Node("y", parent=z) + >>> f = Node("f", parent=root) + >>> g = Node("g", parent=f) + >>> h = Node("h", parent=g) + >>> print_tree(root) + a + ├── b + │ ├── c + │ │ └── e + │ └── z + │ └── y + ├── c + │ └── d + └── f + └── g + └── h + + >>> copy_nodes(root, ["a/b/c", "z", "a/f"], ["a/c", "a/z", "a/f"], merge_leaves=True) + >>> print_tree(root) + a + ├── b + │ ├── c + │ │ └── e + │ └── z + │ └── y + ├── c + │ ├── d + │ └── e + ├── f + │ └── g + │ └── h + ├── y + └── h + + In ``delete_children`` case, only the node is copied without its accompanying children/descendants. + + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=c) + >>> c2 = Node("c", parent=b) + >>> e = Node("e", parent=c2) + >>> z = Node("z", parent=b) + >>> y = Node("y", parent=z) + >>> print_tree(root) + a + ├── b + │ ├── c + │ │ └── e + │ └── z + │ └── y + └── c + └── d + + >>> copy_nodes(root, ["a/b/z"], ["a/z"], delete_children=True) + >>> print_tree(root) + a + ├── b + │ ├── c + │ │ └── e + │ └── z + │ └── y + ├── c + │ └── d + └── z + + Args: + tree (Node): tree to modify + from_paths (list): original paths to shift nodes from + to_paths (list): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + skippable (bool): indicator to skip if from path is not found, defaults to False + overriding (bool): indicator to override existing to path if there is clashes, defaults to False + merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False + merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False + delete_children (bool): indicator to copy node only without children, defaults to False + """ + return copy_or_shift_logic( + tree=tree, + from_paths=from_paths, + to_paths=to_paths, + sep=sep, + copy=True, + skippable=skippable, + overriding=overriding, + merge_children=merge_children, + merge_leaves=merge_leaves, + delete_children=delete_children, + to_tree=None, + ) # pragma: no cover + + +def copy_nodes_from_tree_to_tree( + from_tree: Node, + to_tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + skippable: bool = False, + overriding: bool = False, + merge_children: bool = False, + merge_leaves: bool = False, + delete_children: bool = False, +): + """Copy nodes from `from_paths` to `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable). + - Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden). + - Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged). + - Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged) + - Able to copy node only and delete children, defaults to False (nodes are copied together with children). + + For paths in `from_paths` and `to_paths`, + - Path name can be with or without leading tree path separator symbol. + - Path name can be partial path (trailing part of path) or node name. + - Path name must be unique to one node. + + If ``merge_children=True``, + - If `to_path` is not present, it copies children of `from_path` + - If `to_path` is present, and ``overriding=False``, original and new children are merged + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained. + + If ``merge_leaves=True``, + - If `to_path` is not present, it copies leaves of `from_path`. + - If `to_path` is present, and ``overriding=False``, original children and leaves are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained. + + >>> from bigtree import Node, copy_nodes_from_tree_to_tree, print_tree + >>> root = Node("a") + >>> b = Node("b", parent=root) + >>> c = Node("c", parent=root) + >>> d = Node("d", parent=c) + >>> e = Node("e", parent=root) + >>> f = Node("f", parent=e) + >>> g = Node("g", parent=f) + >>> print_tree(root) + a + ├── b + ├── c + │ └── d + └── e + └── f + └── g + + >>> root_other = Node("aa") + >>> copy_nodes_from_tree_to_tree(root, root_other, ["a/b", "a/c", "a/e"], ["aa/b", "aa/b/c", "aa/dummy/e"]) + >>> print_tree(root_other) + aa + ├── b + │ └── c + │ └── d + └── dummy + └── e + └── f + └── g + + In overriding case, + + >>> root_other = Node("aa") + >>> c = Node("c", parent=root_other) + >>> e = Node("e", parent=c) + >>> print_tree(root_other) + aa + └── c + └── e + + >>> copy_nodes_from_tree_to_tree(root, root_other, ["a/b", "a/c"], ["aa/b", "aa/c"], overriding=True) + >>> print_tree(root_other) + aa + ├── b + └── c + └── d + + In ``merge_children`` case, child nodes are copied instead of the parent node. + - If the path already exists, child nodes are merged with existing children. + + >>> root_other = Node("aa") + >>> c = Node("c", parent=root_other) + >>> e = Node("e", parent=c) + >>> print_tree(root_other) + aa + └── c + └── e + + >>> copy_nodes_from_tree_to_tree(root, root_other, ["a/c", "e"], ["a/c", "a/e"], merge_children=True) + >>> print_tree(root_other) + aa + ├── c + │ ├── e + │ └── d + └── f + └── g + + In ``merge_leaves`` case, leaf nodes are copied instead of the parent node. + - If the path already exists, leaf nodes are merged with existing children. + + >>> root_other = Node("aa") + >>> c = Node("c", parent=root_other) + >>> e = Node("e", parent=c) + >>> print_tree(root_other) + aa + └── c + └── e + + >>> copy_nodes_from_tree_to_tree(root, root_other, ["a/c", "e"], ["a/c", "a/e"], merge_leaves=True) + >>> print_tree(root_other) + aa + ├── c + │ ├── e + │ └── d + └── g + + In ``delete_children`` case, only the node is copied without its accompanying children/descendants. + + >>> root_other = Node("aa") + >>> print_tree(root_other) + aa + + >>> copy_nodes_from_tree_to_tree(root, root_other, ["a/c", "e"], ["a/c", "a/e"], delete_children=True) + >>> print_tree(root_other) + aa + ├── c + └── e + + Args: + from_tree (Node): tree to copy nodes from + to_tree (Node): tree to copy nodes to + from_paths (list): original paths to shift nodes from + to_paths (list): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + skippable (bool): indicator to skip if from path is not found, defaults to False + overriding (bool): indicator to override existing to path if there is clashes, defaults to False + merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False + merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False + delete_children (bool): indicator to copy node only without children, defaults to False + """ + return copy_or_shift_logic( + tree=from_tree, + from_paths=from_paths, + to_paths=to_paths, + sep=sep, + copy=True, + skippable=skippable, + overriding=overriding, + merge_children=merge_children, + merge_leaves=merge_leaves, + delete_children=delete_children, + to_tree=to_tree, + ) # pragma: no cover + + +def copy_or_shift_logic( + tree: Node, + from_paths: List[str], + to_paths: List[str], + sep: str = "/", + copy: bool = False, + skippable: bool = False, + overriding: bool = False, + merge_children: bool = False, + merge_leaves: bool = False, + delete_children: bool = False, + to_tree: Optional[Node] = None, +): + """Shift or copy nodes from `from_paths` to `to_paths` *in-place*. + + - Creates intermediate nodes if to path is not present + - Able to copy node, defaults to False (nodes are shifted; not copied). + - Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable) + - Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden) + - Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged) + - Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged) + - Able to shift/copy node only and delete children, defaults to False (nodes are shifted/copied together with children). + - Able to shift/copy nodes from one tree to another tree, defaults to None (shifting/copying happens within same tree) + + For paths in `from_paths` and `to_paths`, + - Path name can be with or without leading tree path separator symbol. + - Path name can be partial path (trailing part of path) or node name. + - Path name must be unique to one node. + + For paths in `to_paths`, + - Can set to empty string or None to delete the path in `from_paths`, note that ``copy`` must be set to False. + + If ``merge_children=True``, + - If `to_path` is not present, it shifts/copies children of `from_path`. + - If `to_path` is present, and ``overriding=False``, original and new children are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new children are retained. + + If ``merge_leaves=True``, + - If `to_path` is not present, it shifts/copies leaves of `from_path`. + - If `to_path` is present, and ``overriding=False``, original children and leaves are merged. + - If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained, + original non-leaf nodes in `from_path` are retained. + + Args: + tree (Node): tree to modify + from_paths (list): original paths to shift nodes from + to_paths (list): new paths to shift nodes to + sep (str): path separator for input paths, applies to `from_path` and `to_path` + copy (bool): indicator to copy node, defaults to False + skippable (bool): indicator to skip if from path is not found, defaults to False + overriding (bool): indicator to override existing to path if there is clashes, defaults to False + merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False + merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False + delete_children (bool): indicator to shift/copy node only without children, defaults to False + to_tree (Node): tree to copy to, defaults to None + """ + if merge_children and merge_leaves: + raise ValueError( + "Invalid shifting, can only specify one type of merging, check `merge_children` and `merge_leaves`" + ) + if not (isinstance(from_paths, list) and isinstance(to_paths, list)): + raise ValueError( + "Invalid type, `from_paths` and `to_paths` should be list type" + ) + if len(from_paths) != len(to_paths): + raise ValueError( + f"Paths are different length, input `from_paths` have {len(from_paths)} entries, " + f"while output `to_paths` have {len(to_paths)} entries" + ) + for from_path, to_path in zip(from_paths, to_paths): + if to_path: + if from_path.split(sep)[-1] != to_path.split(sep)[-1]: + raise ValueError( + f"Unable to assign from_path {from_path} to to_path {to_path}\n" + f"Verify that `sep` is defined correctly for path\n" + f"Alternatively, check that `from_path` and `to_path` is reassigning the same node" + ) + + transfer_indicator = False + node_type = tree.__class__ + tree_sep = tree.sep + if to_tree: + transfer_indicator = True + node_type = to_tree.__class__ + tree_sep = to_tree.sep + for from_path, to_path in zip(from_paths, to_paths): + from_path = from_path.replace(sep, tree.sep) + from_node = find_path(tree, from_path) + + # From node not found + if not from_node: + if not skippable: + raise NotFoundError( + f"Unable to find from_path {from_path}\n" + f"Set `skippable` to True to skip shifting for nodes not found" + ) + else: + logging.info(f"Unable to find from_path {from_path}") + + # From node found + else: + # Node to be deleted + if not to_path: + to_node = None + # Node to be copied/shifted + else: + to_path = to_path.replace(sep, tree_sep) + if transfer_indicator: + to_node = find_path(to_tree, to_path) + else: + to_node = find_path(tree, to_path) + + # To node found + if to_node: + if from_node == to_node: + if merge_children: + parent = to_node.parent + to_node.parent = None + to_node = parent + elif merge_leaves: + to_node = to_node.parent + else: + raise TreeError( + f"Attempting to shift the same node {from_node.node_name} back to the same position\n" + f"Check from path {from_path} and to path {to_path}\n" + f"Alternatively, set `merge_children` or `merge_leaves` to True if intermediate node is to be removed" + ) + elif merge_children: + # Specify override to remove existing node, else children are merged + if not overriding: + logging.info( + f"Path {to_path} already exists and children are merged" + ) + else: + logging.info( + f"Path {to_path} already exists and its children be overridden by the merge" + ) + parent = to_node.parent + to_node.parent = None + to_node = parent + merge_children = False + elif merge_leaves: + # Specify override to remove existing node, else leaves are merged + if not overriding: + logging.info( + f"Path {to_path} already exists and leaves are merged" + ) + else: + logging.info( + f"Path {to_path} already exists and its leaves be overridden by the merge" + ) + del to_node.children + else: + if not overriding: + raise TreeError( + f"Path {to_path} already exists and unable to override\n" + f"Set `overriding` to True to perform overrides\n" + f"Alternatively, set `merge_children` to True if nodes are to be merged" + ) + logging.info( + f"Path {to_path} already exists and will be overridden" + ) + parent = to_node.parent + to_node.parent = None + to_node = parent + + # To node not found + else: + # Find parent node + to_path_list = to_path.split(tree_sep) + idx = 1 + to_path_parent = tree_sep.join(to_path_list[:-idx]) + if transfer_indicator: + to_node = find_path(to_tree, to_path_parent) + else: + to_node = find_path(tree, to_path_parent) + + # Create intermediate parent node, if applicable + while (not to_node) & (idx + 1 < len(to_path_list)): + idx += 1 + to_path_parent = sep.join(to_path_list[:-idx]) + if transfer_indicator: + to_node = find_path(to_tree, to_path_parent) + else: + to_node = find_path(tree, to_path_parent) + if not to_node: + raise NotFoundError( + f"Unable to find to_path {to_path}\n" + f"Please specify valid path to shift node to" + ) + for depth in range(len(to_path_list) - idx, len(to_path_list) - 1): + intermediate_child_node = node_type(to_path_list[depth]) + intermediate_child_node.parent = to_node + to_node = intermediate_child_node + + # Reassign from_node to new parent + if copy: + logging.debug(f"Copying {from_node.node_name}") + from_node = from_node.copy() + if merge_children: + logging.debug( + f"Reassigning children from {from_node.node_name} to {to_node.node_name}" + ) + for children in from_node.children: + if delete_children: + del children.children + children.parent = to_node + from_node.parent = None + elif merge_leaves: + logging.debug( + f"Reassigning leaf nodes from {from_node.node_name} to {to_node.node_name}" + ) + for children in from_node.leaves: + children.parent = to_node + else: + if delete_children: + del from_node.children + from_node.parent = to_node diff --git a/python37/packages/bigtree/tree/search.py b/python37/packages/bigtree/tree/search.py new file mode 100644 index 0000000..2851811 --- /dev/null +++ b/python37/packages/bigtree/tree/search.py @@ -0,0 +1,316 @@ +from typing import Any, Callable, Iterable + +from bigtree.node.basenode import BaseNode +from bigtree.node.node import Node +from bigtree.utils.exceptions import CorruptedTreeError, SearchError +from bigtree.utils.iterators import preorder_iter + +__all__ = [ + "findall", + "find", + "find_name", + "find_names", + "find_full_path", + "find_path", + "find_paths", + "find_attr", + "find_attrs", + "find_children", +] + + +def findall( + tree: BaseNode, + condition: Callable, + max_depth: int = None, + min_count: int = None, + max_count: int = None, +) -> tuple: + """ + Search tree for nodes matching condition (callable function). + + >>> from bigtree import Node, findall + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> findall(root, lambda node: node.age > 62) + (Node(/a, age=90), Node(/a/b, age=65)) + + Args: + tree (BaseNode): tree to search + condition (Callable): function that takes in node as argument, returns node if condition evaluates to `True` + max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None + min_count (int): checks for minimum number of occurrence, + raise SearchError if number of results do not meet min_count, defaults to None + max_count (int): checks for maximum number of occurrence, + raise SearchError if number of results do not meet min_count, defaults to None + + Returns: + (tuple) + """ + result = tuple(preorder_iter(tree, filter_condition=condition, max_depth=max_depth)) + if min_count and len(result) < min_count: + raise SearchError( + f"Expected more than {min_count} element(s), found {len(result)} elements\n{result}" + ) + if max_count and len(result) > max_count: + raise SearchError( + f"Expected less than {max_count} element(s), found {len(result)} elements\n{result}" + ) + return result + + +def find(tree: BaseNode, condition: Callable, max_depth: int = None) -> BaseNode: + """ + Search tree for *single node* matching condition (callable function). + + >>> from bigtree import Node, find + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find(root, lambda node: node.age == 65) + Node(/a/b, age=65) + >>> find(root, lambda node: node.age > 5) + Traceback (most recent call last): + ... + bigtree.utils.exceptions.SearchError: Expected less than 1 element(s), found 4 elements + (Node(/a, age=90), Node(/a/b, age=65), Node(/a/c, age=60), Node(/a/c/d, age=40)) + + Args: + tree (BaseNode): tree to search + condition (Callable): function that takes in node as argument, returns node if condition evaluates to `True` + max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None + + Returns: + (BaseNode) + """ + result = findall(tree, condition, max_depth, max_count=1) + if result: + return result[0] + + +def find_name(tree: Node, name: str, max_depth: int = None) -> Node: + """ + Search tree for single node matching name attribute. + + >>> from bigtree import Node, find_name + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_name(root, "c") + Node(/a/c, age=60) + + Args: + tree (Node): tree to search + name (str): value to match for name attribute + max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None + + Returns: + (Node) + """ + return find(tree, lambda node: node.node_name == name, max_depth) + + +def find_names(tree: Node, name: str, max_depth: int = None) -> Iterable[Node]: + """ + Search tree for multiple node(s) matching name attribute. + + >>> from bigtree import Node, find_names + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("b", age=40, parent=c) + >>> find_names(root, "c") + (Node(/a/c, age=60),) + >>> find_names(root, "b") + (Node(/a/b, age=65), Node(/a/c/b, age=40)) + + Args: + tree (Node): tree to search + name (str): value to match for name attribute + max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None + + Returns: + (Iterable[Node]) + """ + return findall(tree, lambda node: node.node_name == name, max_depth) + + +def find_full_path(tree: Node, path_name: str) -> Node: + """ + Search tree for single node matching path attribute. + - Path name can be with or without leading tree path separator symbol. + - Path name must be full path, works similar to `find_path` but faster. + + >>> from bigtree import Node, find_full_path + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_full_path(root, "/a/c/d") + Node(/a/c/d, age=40) + + Args: + tree (Node): tree to search + path_name (str): value to match (full path) of path_name attribute + + Returns: + (Node) + """ + path_name = path_name.rstrip(tree.sep).lstrip(tree.sep) + path_list = path_name.split(tree.sep) + if path_list[0] != tree.root.node_name: + raise ValueError( + f"Path {path_name} does not match the root node name {tree.root.node_name}" + ) + parent_node = tree.root + child_node = parent_node + for child_name in path_list[1:]: + child_node = find_children(parent_node, child_name) + if not child_node: + break + parent_node = child_node + return child_node + + +def find_path(tree: Node, path_name: str) -> Node: + """ + Search tree for single node matching path attribute. + - Path name can be with or without leading tree path separator symbol. + - Path name can be full path or partial path (trailing part of path) or node name. + + >>> from bigtree import Node, find_path + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_path(root, "c") + Node(/a/c, age=60) + >>> find_path(root, "/c") + Node(/a/c, age=60) + + Args: + tree (Node): tree to search + path_name (str): value to match (full path) or trailing part (partial path) of path_name attribute + + Returns: + (Node) + """ + path_name = path_name.rstrip(tree.sep) + return find(tree, lambda node: node.path_name.endswith(path_name)) + + +def find_paths(tree: Node, path_name: str) -> tuple: + """ + Search tree for multiple nodes matching path attribute. + - Path name can be with or without leading tree path separator symbol. + - Path name can be partial path (trailing part of path) or node name. + + >>> from bigtree import Node, find_paths + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("c", age=40, parent=c) + >>> find_paths(root, "/a/c") + (Node(/a/c, age=60),) + >>> find_paths(root, "/c") + (Node(/a/c, age=60), Node(/a/c/c, age=40)) + + Args: + tree (Node): tree to search + path_name (str): value to match (full path) or trailing part (partial path) of path_name attribute + + Returns: + (tuple) + """ + path_name = path_name.rstrip(tree.sep) + return findall(tree, lambda node: node.path_name.endswith(path_name)) + + +def find_attr( + tree: BaseNode, attr_name: str, attr_value: Any, max_depth: int = None +) -> BaseNode: + """ + Search tree for single node matching custom attribute. + + >>> from bigtree import Node, find_attr + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_attr(root, "age", 65) + Node(/a/b, age=65) + + Args: + tree (BaseNode): tree to search + attr_name (str): attribute name to perform matching + attr_value (Any): value to match for attr_name attribute + max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None + + Returns: + (BaseNode) + """ + return find( + tree, lambda node: node.__getattribute__(attr_name) == attr_value, max_depth + ) + + +def find_attrs( + tree: BaseNode, attr_name: str, attr_value: Any, max_depth: int = None +) -> tuple: + """ + Search tree for node(s) matching custom attribute. + + >>> from bigtree import Node, find_attrs + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=65, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_attrs(root, "age", 65) + (Node(/a/b, age=65), Node(/a/c, age=65)) + + Args: + tree (BaseNode): tree to search + attr_name (str): attribute name to perform matching + attr_value (Any): value to match for attr_name attribute + max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None + + Returns: + (tuple) + """ + return findall( + tree, lambda node: node.__getattribute__(attr_name) == attr_value, max_depth + ) + + +def find_children(tree: Node, name: str) -> Node: + """ + Search tree for single node matching name attribute. + + >>> from bigtree import Node, find_children + >>> root = Node("a", age=90) + >>> b = Node("b", age=65, parent=root) + >>> c = Node("c", age=60, parent=root) + >>> d = Node("d", age=40, parent=c) + >>> find_children(root, "c") + Node(/a/c, age=60) + >>> find_children(c, "d") + Node(/a/c/d, age=40) + + Args: + tree (Node): tree to search, parent node + name (str): value to match for name attribute, child node + + Returns: + (Node) + """ + child = [node for node in tree.children if node and node.node_name == name] + if len(child) > 1: # pragma: no cover + raise CorruptedTreeError( + f"There are more than one path for {child[0].path_name}, check {child}" + ) + elif len(child): + return child[0] diff --git a/python37/packages/bigtree/utils/__init__.py b/python37/packages/bigtree/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python37/packages/bigtree/utils/exceptions.py b/python37/packages/bigtree/utils/exceptions.py new file mode 100644 index 0000000..c0b32b7 --- /dev/null +++ b/python37/packages/bigtree/utils/exceptions.py @@ -0,0 +1,32 @@ +class TreeError(Exception): + pass + + +class LoopError(TreeError): + """Error during node creation""" + + pass + + +class CorruptedTreeError(TreeError): + """Error during node creation or tree creation""" + + pass + + +class DuplicatedNodeError(TreeError): + """Error during tree creation""" + + pass + + +class NotFoundError(TreeError): + """Error during tree creation or tree search""" + + pass + + +class SearchError(TreeError): + """Error during tree search""" + + pass diff --git a/python37/packages/bigtree/utils/iterators.py b/python37/packages/bigtree/utils/iterators.py new file mode 100644 index 0000000..a225db0 --- /dev/null +++ b/python37/packages/bigtree/utils/iterators.py @@ -0,0 +1,371 @@ +from typing import Callable, Iterable, List, Tuple + +__all__ = [ + "inorder_iter", + "preorder_iter", + "postorder_iter", + "levelorder_iter", + "levelordergroup_iter", + "dag_iterator", +] + + +def inorder_iter( + tree, + filter_condition: Callable = None, + max_depth: int = None, +) -> Iterable: + """Iterate through all children of a tree. + + In Iteration Algorithm, LNR + 1. Recursively traverse the current node's left subtree. + 2. Visit the current node. + 3. Recursively traverse the current node's right subtree. + + >>> from bigtree import BinaryNode, list_to_binarytree, inorder_iter, print_tree + >>> num_list = [1, 2, 3, 4, 5, 6, 7, 8] + >>> root = list_to_binarytree(num_list) + >>> print_tree(root) + 1 + ├── 2 + │ ├── 4 + │ │ └── 8 + │ └── 5 + └── 3 + ├── 6 + └── 7 + + >>> [node.node_name for node in inorder_iter(root)] + ['8', '4', '2', '5', '1', '6', '3', '7'] + + >>> [node.node_name for node in inorder_iter(root, filter_condition=lambda x: x.node_name in ["1", "4", "3", "6", "7"])] + ['4', '1', '6', '3', '7'] + + >>> [node.node_name for node in inorder_iter(root, max_depth=3)] + ['4', '2', '5', '1', '6', '3', '7'] + + Args: + tree (BaseNode): input tree + filter_condition (Callable): function that takes in node as argument, optional + Returns node if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, optional + + Returns: + (Iterable[BaseNode]) + """ + if tree and (not max_depth or not tree.depth > max_depth): + yield from inorder_iter(tree.left, filter_condition, max_depth) + if not filter_condition or filter_condition(tree): + yield tree + yield from inorder_iter(tree.right, filter_condition, max_depth) + + +def preorder_iter( + tree, + filter_condition: Callable = None, + stop_condition: Callable = None, + max_depth: int = None, +) -> Iterable: + """Iterate through all children of a tree. + + Pre-Order Iteration Algorithm, NLR + 1. Visit the current node. + 2. Recursively traverse the current node's left subtree. + 3. Recursively traverse the current node's right subtree. + + It is topologically sorted because a parent node is processed before its child nodes. + + >>> from bigtree import Node, list_to_tree, preorder_iter, print_tree + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [node.node_name for node in preorder_iter(root)] + ['a', 'b', 'd', 'e', 'g', 'h', 'c', 'f'] + + >>> [node.node_name for node in preorder_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + ['a', 'd', 'e', 'g', 'f'] + + >>> [node.node_name for node in preorder_iter(root, stop_condition=lambda x: x.node_name=="e")] + ['a', 'b', 'd', 'c', 'f'] + + >>> [node.node_name for node in preorder_iter(root, max_depth=3)] + ['a', 'b', 'd', 'e', 'c', 'f'] + + Args: + tree (BaseNode): input tree + filter_condition (Callable): function that takes in node as argument, optional + Returns node if condition evaluates to `True` + stop_condition (Callable): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, optional + + Returns: + (Iterable[BaseNode]) + """ + if ( + tree + and (not max_depth or not tree.depth > max_depth) + and (not stop_condition or not stop_condition(tree)) + ): + if not filter_condition or filter_condition(tree): + yield tree + for child in tree.children: + yield from preorder_iter(child, filter_condition, stop_condition, max_depth) + + +def postorder_iter( + tree, + filter_condition: Callable = None, + stop_condition: Callable = None, + max_depth: int = None, +) -> Iterable: + """Iterate through all children of a tree. + + Post-Order Iteration Algorithm, LRN + 1. Recursively traverse the current node's left subtree. + 2. Recursively traverse the current node's right subtree. + 3. Visit the current node. + + >>> from bigtree import Node, list_to_tree, postorder_iter, print_tree + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [node.node_name for node in postorder_iter(root)] + ['d', 'g', 'h', 'e', 'b', 'f', 'c', 'a'] + + >>> [node.node_name for node in postorder_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + ['d', 'g', 'e', 'f', 'a'] + + >>> [node.node_name for node in postorder_iter(root, stop_condition=lambda x: x.node_name=="e")] + ['d', 'b', 'f', 'c', 'a'] + + >>> [node.node_name for node in postorder_iter(root, max_depth=3)] + ['d', 'e', 'b', 'f', 'c', 'a'] + + Args: + tree (BaseNode): input tree + filter_condition (Callable): function that takes in node as argument, optional + Returns node if condition evaluates to `True` + stop_condition (Callable): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, optional + + Returns: + (Iterable[BaseNode]) + """ + if ( + tree + and (not max_depth or not tree.depth > max_depth) + and (not stop_condition or not stop_condition(tree)) + ): + for child in tree.children: + yield from postorder_iter( + child, filter_condition, stop_condition, max_depth + ) + if not filter_condition or filter_condition(tree): + yield tree + + +def levelorder_iter( + tree, + filter_condition: Callable = None, + stop_condition: Callable = None, + max_depth: int = None, +) -> Iterable: + """Iterate through all children of a tree. + + Level Order Algorithm + 1. Recursively traverse the nodes on same level. + + >>> from bigtree import Node, list_to_tree, levelorder_iter, print_tree + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [node.node_name for node in levelorder_iter(root)] + ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h'] + + >>> [node.node_name for node in levelorder_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + ['a', 'd', 'e', 'f', 'g'] + + >>> [node.node_name for node in levelorder_iter(root, stop_condition=lambda x: x.node_name=="e")] + ['a', 'b', 'c', 'd', 'f'] + + >>> [node.node_name for node in levelorder_iter(root, max_depth=3)] + ['a', 'b', 'c', 'd', 'e', 'f'] + + Args: + tree (BaseNode): input tree + filter_condition (Callable): function that takes in node as argument, optional + Returns node if condition evaluates to `True` + stop_condition (Callable): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, defaults to None + + Returns: + (Iterable[BaseNode]) + """ + if not isinstance(tree, List): + tree = [tree] + next_level = [] + for _tree in tree: + if _tree: + if (not max_depth or not _tree.depth > max_depth) and ( + not stop_condition or not stop_condition(_tree) + ): + if not filter_condition or filter_condition(_tree): + yield _tree + next_level.extend(list(_tree.children)) + if len(next_level): + yield from levelorder_iter( + next_level, filter_condition, stop_condition, max_depth + ) + + +def levelordergroup_iter( + tree, + filter_condition: Callable = None, + stop_condition: Callable = None, + max_depth: int = None, +) -> Iterable[Iterable]: + """Iterate through all children of a tree. + + Level Order Group Algorithm + 1. Recursively traverse the nodes on same level, returns nodes level by level in a nested list. + + >>> from bigtree import Node, list_to_tree, levelordergroup_iter, print_tree + >>> path_list = ["a/b/d", "a/b/e/g", "a/b/e/h", "a/c/f"] + >>> root = list_to_tree(path_list) + >>> print_tree(root) + a + ├── b + │ ├── d + │ └── e + │ ├── g + │ └── h + └── c + └── f + + >>> [[node.node_name for node in group] for group in levelordergroup_iter(root)] + [['a'], ['b', 'c'], ['d', 'e', 'f'], ['g', 'h']] + + >>> [[node.node_name for node in group] for group in levelordergroup_iter(root, filter_condition=lambda x: x.node_name in ["a", "d", "e", "f", "g"])] + [['a'], [], ['d', 'e', 'f'], ['g']] + + >>> [[node.node_name for node in group] for group in levelordergroup_iter(root, stop_condition=lambda x: x.node_name=="e")] + [['a'], ['b', 'c'], ['d', 'f']] + + >>> [[node.node_name for node in group] for group in levelordergroup_iter(root, max_depth=3)] + [['a'], ['b', 'c'], ['d', 'e', 'f']] + + Args: + tree (BaseNode): input tree + filter_condition (Callable): function that takes in node as argument, optional + Returns node if condition evaluates to `True` + stop_condition (Callable): function that takes in node as argument, optional + Stops iteration if condition evaluates to `True` + max_depth (int): maximum depth of iteration, based on `depth` attribute, defaults to None + + Returns: + (Iterable[Iterable]) + """ + if not isinstance(tree, List): + tree = [tree] + + current_tree = [] + next_tree = [] + for _tree in tree: + if (not max_depth or not _tree.depth > max_depth) and ( + not stop_condition or not stop_condition(_tree) + ): + if not filter_condition or filter_condition(_tree): + current_tree.append(_tree) + next_tree.extend([_child for _child in _tree.children if _child]) + yield tuple(current_tree) + if len(next_tree) and (not max_depth or not next_tree[0].depth > max_depth): + yield from levelordergroup_iter( + next_tree, filter_condition, stop_condition, max_depth + ) + + +def dag_iterator(dag) -> Iterable[Tuple]: + """Iterate through all nodes of a Directed Acyclic Graph (DAG). + Note that node names must be unique. + Note that DAG must at least have two nodes to be shown on graph. + + 1. Visit the current node. + 2. Recursively traverse the current node's parents. + 3. Recursively traverse the current node's children. + + >>> from bigtree import DAGNode, dag_iterator + >>> a = DAGNode("a", step=1) + >>> b = DAGNode("b", step=1) + >>> c = DAGNode("c", step=2, parents=[a, b]) + >>> d = DAGNode("d", step=2, parents=[a, c]) + >>> e = DAGNode("e", step=3, parents=[d]) + >>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(a)] + [('a', 'c'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('d', 'e')] + + Args: + dag (DAGNode): input dag + + Returns: + (Iterable[Tuple[DAGNode, DAGNode]]) + """ + visited_nodes = set() + + def recursively_parse_dag(node): + node_name = node.node_name + visited_nodes.add(node_name) + + # Parse upwards + for parent in node.parents: + parent_name = parent.node_name + if parent_name not in visited_nodes: + yield parent, node + + # Parse downwards + for child in node.children: + child_name = child.node_name + if child_name not in visited_nodes: + yield node, child + + # Parse upwards + for parent in node.parents: + parent_name = parent.node_name + if parent_name not in visited_nodes: + yield from recursively_parse_dag(parent) + + # Parse downwards + for child in node.children: + child_name = child.node_name + if child_name not in visited_nodes: + yield from recursively_parse_dag(child) + + yield from recursively_parse_dag(dag) diff --git a/python37/packages/bigtree/workflows/__init__.py b/python37/packages/bigtree/workflows/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python37/packages/bigtree/workflows/app_todo.py b/python37/packages/bigtree/workflows/app_todo.py new file mode 100644 index 0000000..93795e2 --- /dev/null +++ b/python37/packages/bigtree/workflows/app_todo.py @@ -0,0 +1,249 @@ +import json +import logging +from typing import List, Union + +from bigtree.node.node import Node +from bigtree.tree.construct import dict_to_tree +from bigtree.tree.export import print_tree, tree_to_dict +from bigtree.tree.search import find_children, find_name + +logging.getLogger(__name__).addHandler(logging.NullHandler()) + + +class AppToDo: + """ + To-Do List Implementation with Big Tree. + - To-Do List has three levels - app name, list name, and item name. + - If list name is not given, item will be assigned to a `General` list. + + *Initializing and Adding Items* + + >>> from bigtree import AppToDo + >>> app = AppToDo("To Do App") + >>> app.add_item(item_name="Homework 1", list_name="School") + >>> app.add_item(item_name=["Milk", "Bread"], list_name="Groceries", description="Urgent") + >>> app.add_item(item_name="Cook") + >>> app.show() + To Do App + ├── School + │ └── Homework 1 + ├── Groceries + │ ├── Milk [description=Urgent] + │ └── Bread [description=Urgent] + └── General + └── Cook + + *Reorder List and Item* + + >>> app.prioritize_list(list_name="General") + >>> app.show() + To Do App + ├── General + │ └── Cook + ├── School + │ └── Homework 1 + └── Groceries + ├── Milk [description=Urgent] + └── Bread [description=Urgent] + + >>> app.prioritize_item(item_name="Bread") + >>> app.show() + To Do App + ├── General + │ └── Cook + ├── School + │ └── Homework 1 + └── Groceries + ├── Bread [description=Urgent] + └── Milk [description=Urgent] + + *Removing Items* + + >>> app.remove_item("Homework 1") + >>> app.show() + To Do App + ├── General + │ └── Cook + └── Groceries + ├── Bread [description=Urgent] + └── Milk [description=Urgent] + + *Exporting and Importing List* + + >>> app.save("list.json") + >>> app2 = AppToDo.load("list.json") + >>> app2.show() + To Do App + ├── General + │ └── Cook + └── Groceries + ├── Bread [description=Urgent] + └── Milk [description=Urgent] + """ + + def __init__( + self, + app_name: str = "", + ): + """Initialize To-Do app + + Args: + app_name (str): name of to-do app, optional + """ + self._root = Node(app_name) + + def add_list(self, list_name: str, **kwargs) -> Node: + """Add list to app + + If list is present, return list node, else a new list will be created + + Args: + list_name (str): name of list + + Returns: + (Node) + """ + list_node = find_children(self._root, list_name) + if not list_node: + list_node = Node(list_name, parent=self._root, **kwargs) + logging.info(f"Created list {list_name}") + return list_node + + def prioritize_list(self, list_name: str): + """Prioritize list in app, shift it to be the first list + + Args: + list_name (str): name of list + """ + list_node = find_children(self._root, list_name) + if not list_node: + raise ValueError(f"List {list_name} not found") + current_children = list(self._root.children) + current_children.remove(list_node) + current_children.insert(0, list_node) + self._root.children = current_children + + def add_item(self, item_name: Union[str, List[str]], list_name: str = "", **kwargs): + """Add items to list + + Args: + item_name (str/List[str]): items to be added + list_name (str): list to add items to, optional + """ + if not isinstance(item_name, str) and not isinstance(item_name, list): + raise TypeError("Invalid data type for item") + + # Get list to add to + if list_name: + list_node = self.add_list(list_name) + else: + list_node = self.add_list("General") + + # Add items to list + if isinstance(item_name, str): + _ = Node(item_name, parent=list_node, **kwargs) + logging.info(f"Created item {item_name}") + elif isinstance(item_name, list): + for _item in item_name: + _ = Node(_item, parent=list_node, **kwargs) + logging.info(f"Created items {', '.join(item_name)}") + + def remove_item(self, item_name: Union[str, List[str]], list_name: str = ""): + """Remove items from list + + Args: + item_name (str/List[str]): items to be added + list_name (str): list to add items to, optional + """ + if not isinstance(item_name, str) and not isinstance(item_name, list): + raise TypeError("Invalid data type for item") + + # Check if items can be found + items_to_remove = [] + parent_to_check = set() + if list_name: + list_node = find_children(self._root, list_name) + if isinstance(item_name, str): + item_node = find_children(list_node, item_name) + items_to_remove.append(item_node) + parent_to_check.add(item_node.parent) + elif isinstance(item_name, list): + for _item in item_name: + item_node = find_children(list_node, _item) + items_to_remove.append(item_node) + parent_to_check.add(item_node.parent) + else: + if isinstance(item_name, str): + item_node = find_name(self._root, item_name) + items_to_remove.append(item_node) + parent_to_check.add(item_node.parent) + elif isinstance(item_name, list): + for _item in item_name: + item_node = find_name(self._root, _item) + items_to_remove.append(item_node) + parent_to_check.add(item_node.parent) + + # Remove items + for item_node in items_to_remove: + item_node.parent = None + logging.info( + f"Removed items {', '.join(item.node_name for item in items_to_remove)}" + ) + + # Remove list if empty + for list_node in parent_to_check: + if not len(list(list_node.children)): + list_node.parent = None + logging.info(f"Removed list {list_node.node_name}") + + def prioritize_item(self, item_name: str): + """Prioritize item in list, shift it to be the first item in list + + Args: + item_name (str): name of item + """ + item_node = find_name(self._root, item_name) + if not item_node: + raise ValueError(f"Item {item_node} not found") + current_parent = item_node.parent + current_children = list(current_parent.children) + current_children.remove(item_node) + current_children.insert(0, item_node) + current_parent.children = current_children + + def show(self, **kwargs): + """Print tree to console""" + print_tree(self._root, all_attrs=True, **kwargs) + + @staticmethod + def load(json_path: str): + """Load To-Do app from json + + Args: + json_path (str): json load path + + Returns: + (Self) + """ + if not json_path.endswith(".json"): + raise ValueError("Path should end with .json") + + with open(json_path, "r") as fp: + app_dict = json.load(fp) + _app = AppToDo("dummy") + AppToDo.__setattr__(_app, "_root", dict_to_tree(app_dict["root"])) + return _app + + def save(self, json_path: str): + """Save To-Do app as json + + Args: + json_path (str): json save path + """ + if not json_path.endswith(".json"): + raise ValueError("Path should end with .json") + + node_dict = tree_to_dict(self._root, all_attrs=True) + app_dict = {"root": node_dict} + with open(json_path, "w") as fp: + json.dump(app_dict, fp) diff --git a/python37/packages/bigtree_info b/python37/packages/bigtree_info new file mode 100644 index 0000000000000000000000000000000000000000..4e31788730588949f9a8c4abff03bb0108925099 GIT binary patch literal 40960 zcmeHw`)(Uav!DMS;Jia0If!M2qrSZ2>D7>JzTsDpOY^Ing()Zx+MJAuyC}z@`d^Ym}uen_Ag(y7Z z814i$_I!OL#0#T0vYk6?y?8Npz9qfBcTbIfk&oJU(Ep^|EbWz=rRSIw5xJPp-$DOE zc4H$K(Lb{R`WN!W%nOlzj?N!~@vofzCzh|9y00fcSR>DNoi`$z%4nzhzm1bDJZ})-z{`T)5OnQ&#gw~nU06gzfTN(VW3 zQPYhp8jSz7C^R<5Ci>+7~S@TrF#?MyP|LDBZnmp25mv+wEGs74|E65 zF&$619f1XR)d9A=l%^%|rvXWQmSo4abLG9!lH&7}NcysRzerlQoOJ@=`RlK#@qz6h zj@wbIyM+>0-|be!_w(N`M7$J8kh`8O2fy1kI_g07OBo*fww&fTW3x3HVNfM+{{zP<4Nnp zYL!~M3?jL|zqgl8{|C>X^sQ~E;v{rplB@n(@Q%BU{*nVLsV8NZT)6A@-O+Ztk0rVy z^uE8n0;uA|61tF3SA>MOa$^xK^nOzB+g<0)z&6dkg|V*d_&>ydLPfA%UcN;0!O(Re zCOoa#vqpf3|2g4VT~bsYp(zv;hU<8cXtY?d{D_7LJS6i#eIXlR4+7C3bE0HSjJ|D% zgONV$iPgQ*!E4C8xFLddyAM-@8D;uOLI5dhFq>HRwj-dF)j-iKlM-udr><$Ot%2tR zzFuDL=$=JImzN0=pMlsgNCi`+EL93B1eK-HE13HF|d66#f`f4hA4wIgl_wP-Ika1BVBZPl=Lg3Ysw!*ZQU?O3O@51bt#Y z)L|iBo2VP&=g!8_tRyO?Eyg&msDq;Hz;*hQXM-J5Qv<%%)|w+7i-IXMMZwz0N>)dv zMGVL_^uA(~nhOtm!6Se>CL?aqGvGD$EO{_;$HN$4JYHKnaZS53nPa6!&+hn&A{}C@ zzH3NB9g7cS0`u`#HM(7QIC*4k40yIS*1BL)1GetCw9GV6na~9KH=uZpUc}@!NIn?% zeH%Qi`fOvFnF3w}t(OGxx^L@!W_~yc!SWF|dVh?^W6v5TO{-%&XaxCT+GLujnd}pI zBFx>u{oa#+(gK+pvs{Cc+ryDRVU(a-e3TJ55kez8XyD_uwL_~v1Uv1I9d?^SH176Y zhp0Y-cOj(_8#DpujmC;TRDjtWSU%(gR1YMIVTrco-&&R<{9BiydD;eb3?Bu0Y>u&N zSjMdANN~G-79kL8vNlQfwP>+If~+l)OAtnDbJ9i)E&xQFL#*ohV%2~< ztM^e|zk$ce_AP40#&wsHV|7cNk(%RiuuavLYM5s{8k54A*IzN{A>O1qnNnjNn847U zSg%wZQ$S$HDQUV0Ud5Rcj;!OKU=_qkke?^PLfsGJBEZS4>Ss=%1rX}-`^K^WZGgQP zT>$FCGNEPnbd9f;&>iZ0PIJWvaRB0sU|7LJB*g~dJFpPdFf~>(kt~R}304`_ydw7W z+P`y|NAd#Teq@zvFfuE^zTO94k?hx+E>;B>h)ZR|gO?%scO6>%Mn${5ZBSfxk!Mm(49Gdr(vAt4*`^^1*d4-|3N7ijP_AVyZ$wFSr z(Mxw0m}`=3-HVGbO*;%MW6y=fY6O;C7pj8dD71uKuO`|2^36jt~lF*?V z#7uy#o&^&OHa9q$G`jsAh+@$u)fDu@l(tyNF@e-#wwCR%c48fa3Lxv1qyfCDo?sJ1 zn&Fpn8C|mHf62NQYVQ)x2n5ndak-b!(6zL<+YrBOJh{-jx{q=ehEfDOH!#A%a6*A? zIhek9`&KYu{&qP>pe+$Vgg)JV(MPY#`FJEn+vwF1)sU zw?(AXMlMw1HrHBR(N|!~bgk{J^lPmhmbO)CvG7_m!crqBMO`y2HC1UIr3^b2;0RSd zw-dtnqM0d)M3l5OEgIP>jYZm0Es_A+pI0!Ll`pRm3DU)9Ij}GI^2hK0`;XuMGyaLT zEG`yg@qdNB{^R%G)k9N0%%|nUpx97tP^=+@3CC|cjuJHrqC^W$3BtD9D_ys5uDq7D zQ@7BlRtx$b`0_w^Z=DcICIqVR!v5#aO4sw!sE+;N*pEU9cZX<3`fU(znSl&sK8?zM zJz!U93 z4m^2L&7pgC{YZ}ACJvunL{pEZSCV?v2Kncfi3+ArILZd&VTs`Xe@Z4n1H5A){5yO$ z@tNKY$(T>&q4;Q~hdn#LV3luwHnCpkgi7?7MZj~-hYd(+pRmb5f_yiyqm(m9etSaN ziNpuWA`%wDa}d~_abG%y9FdTL5Q-d0%&Yhaxla_qy&ruv0urlI$i~05+LP9Z;=T~| zaZwy{O}z3dq~3aaodhX97$p8?i62lqCI><1KYpJPe_R$yy@j2{N4EM|Du9*t%Ikzs zpAGq$RKN;EAO!a4@eQT+5OlnXr$gI|&KUjBF@La+of}0rFcg3KV^&$M|D* zksnur##m?SDIO>$Di-2p3jj$NzlK1h=-}>|bL4ZpehC0c_Crb3h|Ia5L&b-v9x~km zg#28*#{ojgdvmd%M={)d2#M{68GS+P4LXEk)pKEV6~azvQ&DJDds@gS+gyzl`3gf* zTWG!0hEs>~7$P>DD~b#8Y4t)?VcibR)LxJeu}b_Eupm?o$i9^h8H05XieOL&yh269 z9L1Shtf63Ab3OU05CFTK&=fEfo`n{{+}KyC&4JMG)D*9=hJAoSKO+@CLVEDka(iR&GAF z4q$=7=SFA)F1K`=BW1B@g9Z^&3LVrqbWARmrj6t2V zvm6LTm3&kM&Vq$CFZ;H@Mf5yZ2tnX^G)H!J?>FHBSDRdkSrgNmH8BIKc;hw^W45OR z+hi@$b}e_*A}O&=<|e?9*Am&Yva`T;1%YRVOc@#9S_pvwK=!Wj2h9yiC8;sDgqEK- z50RezsklRksp+`Gv{i1&hdC9o2*(%;O3cl5Ze(&5wb~@j8f=tgRB! zDT)(EYFo&1isMAIj7BY=QsgFj8jR0UZF8z^P;LY;DcShC_mAKI3;wCq@ZW%gs7$U2S@u6wJ-GmbZ};E`d%Xx2QxGfYDwxCr z6Z_l$Q_(c;D*Sd&SEf5gE>zUh-~P`ZfBWzFCnxo{|3Q-q%Xm_%?0>0xvZvtNbWdy@ zbDWSrDGo4@ed`XC?JU2vM|r<``P*=j^YUzfYtkJKm(kfXfFI*r(9PQA>=V(w8VQ;(+A$kd~# zw|g~2z(WQvgg_?1l#LCY4Ng)9tl!vLB)qWj_b9~6)+OG|6=_8xghKd|&)VTrWD8GZ zohK@=U|2JQBzPD${l!AYkB^gq#*=@U#EC}!lhJ0Hoq})Ewt#sb-N@*XyGOinjYX+e zVmv7!$W=hv0l0G7FXI)va04YmT8r72+D1NEv|fTJORk)P7%IAGA%K{gXHRn!;o#yS>QvFfZqI=qiaFD&*FTX9a=+N<~uR;ystwqn%6>wx)q7b_q=_I1D~LHih6M-Ysl6bur;U`hor zAgCAd?~{QNOL_rN5F_Nt2vTe)3AM>il3Hamo*{;0`xA&!Z0T9ZrV7$?C|3koIF#|& zQ5lcOg(Cy~;dDomtm7~z64-=zM*a$ijRF(Gu(Y^k@i=>8MS2-&JCSZA5;hX);qxYIw>Oh=!y{ zM37?=BO$;X6kiF_LaBRBmL_Hq;KbS0{G%pOWMuJ3Xbg*Vf2aebqtqfY!WlytoIWrb zLuP~3F{gl3g9v;85XlWM#lb__t_s_{GD9TGZ}Yef19c>arqKR_lbtc^h{iLf-pn8Z zLu#hKpg2Hr3%87_h_w^cQ&BrUq%;o7iPc||cy_7zN z9o;CIEVe-y6^7eAu_8+G3P21wlX@|#P7%E9iqnT=2UZY zy19fFZ_eA-vF0@7XzbQQ&2yAz5jHob8XMD%B_85^7t_C??Qcpff!|{st|T4ov=$pL z+s)CKbz*?!7<$zJKne6u=>T#-!9G3;Kg%LeM>Quxfm8ty;3cd<hh6Lii;hdc__ zeAS5pM|GG|dHO`&FTL~2bYJP`%A}=}Fe;N)lggpR+32tc0#PZ8l?52DlcF;{VNNbV zYRSj()Ld*XsV+1vOw-*%3Uq~zx{E)Ps~Z4@B_n%@bsy`jCQlDNPk*s4s9Qc^4!}57HpAOT)E#d=g)|l-986 z@Rw{!)~-6=I(76Y+?OW{fDeYCR~bX2^RS$*F3*X?n#prW!y(S;sx$W5)c8DP`1bp- z!pI@q4R-$HC{Sl{_h~ogG8*9}r3FP_gDioEbN`Wl7R(!mLN5DO$HzNaU6&)f+w*_s z(*V(YBP)#00Z`6B^l%m71+toAC3%?u94pBw2QobGwVFkuEwHhTz!zg;8%mK9kYbH8 zI08V2j*)M@EzbkI*2t5HRVjw%8$vPN>1P}9XOy`gYquFb!xaS!Q~Pz5fB>=mn7n?L zA#I)>t&q4JiDU#up%|6~EjHxFUnD~5DB0fhp{B=( zPHILXGq?_fPcem&Q)xfYmeaJbLaQkuh3_OoSwgALj}}kfr4&WMD*KtGk(wKBanifA znZ}Zc1crJ*$x~9ugas6^A(t;Kh$1K0F~$OKBS)P?S6G1(?-b&>f`JMo?ZOHaa11NZ z@dc7c;nPn1DZ>9$fdo%jp@$KXlnM*FQ!T#41zr!0L&+3Pdi6&H6Po{Z5=jAnx+sE< zrxT~hXvH#^xnbhbLI}M}p1?^fJ~W3AfjU1C2_Z7LW2Y2iU$L*k{_-kv%dn|PXBgyD z-dT*Ea!Mv;s^CnTP9zKj7rr4|Tprkv9V)SfGZJ!MDYBi^CL6D`sNgk6m}NCZdpd>* zn5|$sNY(7pS&En9^Q!ck%~y2h;uU+P>N9)CmMmxY)R3RqO~&UhQe-{bDGHv3x-wzz zEQOqoda7!Yl}8?r%;tQMxD=q^MSeGNIi>_U#7|P-9+Dxl<}+{wL2yu%&PB`=%2O0_ zj2@-fb6zv*V(|ravkHJbSTR=@2W{r-QU?F8Dj!$Er6iRQWDX#IQ>+u4UC25rL?-)+ zhiyFY94ptKke_0QI2Oem=gwEn9m-jhNFN9V$vH5;kUt*?UF2xUuRoSH&zm1JDI1}C ze+R#@@6Hk2ndEp*jF;RaV_}&u%yyqylFSsbG@CVH-y3q4gbvBa4N-&@*5*lgZ& zMM=7Q>MTBQ>LIN=ya#CdA|~=gb*LKcRZ5^}GQ zY_(`>(0A!ZHoANWdxRql4M)&PClJiA19C)pWV1CP-3L^YT0nhS9Ua%w$o3dLo%nvm z_af;1S6qs&Y9rglabg@~#}ugDIs-w9`pCjvjkrUB?qul!F6{Y^J;G4JT4>(jGxBge z)p?S+G172oS@9x47ChZNfwQrFanz`u0w6A20Tja0TvME!?b$f|&MbuC!vP{_`$O{38M=2!zlTsY4(hIB8?Uh4Z89?{ui42&XW~<5@ zM$dKocQ^dJa$@(dXo2j|kpmLcvDUcl$N)Mm4;7Fk7TiF%y>d3{!cMsx?8>g|Ua7~# zSF5D9!+GFy08UemJIBlVGQc<|01FM=A$Ofy~*QW1jEbl>;9~F&_ZL zQyyu08WTgt&j@F!uh=}0bIwNOOZ8pdTwRG*puID`8CMD5x%;2b56k7_hwjdOCWY zj(@U(OG$BST;Fcv>Q8K-XidF~!@QGR(m}X8iHkHPZ~d8ZE!+NmFmTuUua|eL^}XjA z!vpKTn9XFP_0RXe7qYp+a{WIK_V0&s7yT=zjb`cixLi+oJw00}Zhs$-$5#HzaVJN~ zv)fwjV|TM-eQ#`h*d2X5dOs@FHojY@Xvi@oby&Aqt&mKh9d<+NKqN>{Irw!EX=kLED5e|?h3;C}qDhxJyA?n!F3 zQo~6A%sf8-KDgehnm3)zchVP@olFn*PY){Hi{8P0Chy;vE~XG8^g7(7Xy_5Z{Kj7S{qXDN;Z8$Owwjf} zd9rY{YmMJq^{YYQ^kjH+{^=msYmE2L66pxL5?VC15ZiapY_>@>y~nqa+hq5kk2;9} zN7KA1-F`~fJ-1VEbGN&L;l<|HQ9E5dsC+d0C&Po|n_jU!={EgJt(+)sWupbccvIa* z;I49wZ)x{?W~cw@aCfIu{^r#4>Bb=2%v>LiJ~`4CA<)$YhIKF*ahL!So}D{&o9&r+a-qJUqN{vb{6C zb29c_`-6Vr7>(oc#^H6gb9GQm+BX-8%tm2j7UIkRKlbJEWbts4PT&WB(lH^;07JT^zj2m##ydn#jPyXETn zn$z|Yxy(jBCS4hz!gU1jqx1FVtk+3?JJ{4GH+wgOjCHZqZ;w7!zaR9R>|pCd@@%J- z*>fsnMC6LG8RN44zW0plB7pZKfj!^I`exwtKW-HA?qR9tj3P zy}#J4f4Z*PrQ5TCbyB!8T07ORUhDjPubSWKT)30kw{{s4b)yiYb@=NN&q$s)%qNj( zaYF?85Q8_}dx#V@3@b<2alYm^w>svBwo@1tj)s-8IXT}=j(lTaUQDW8Gk=jQA2-L7 z=Jjo2GhHnH(=udmzoYgSJ-y3M3%#0W=$HKer~Uq0F_TX(fB)?nQ2!u^2kEc8+6MvM zQ{jR5Pc~ghNAVy2{kM%wei{G02cysT2tPX}j%5UcuHYO_IR6iK%BGEPdfZ3io%+=}YR(h)GyuguBl15mPTW?+ z<}%(bp4JdIq}U$rtfU{xl=%}#gr!1`0dX?vI<_D^xjKkSXFPJyrA+)au|?`~kMzrUgaE1@otuOeF7Fw~FI3T?N~E*VRw&Iy z(iQiZV5p34CIAmYQdK=dxKG~eB@u>xfnF4a)4}Y9lzvs{5jJdLiCbPWKX` z{IOOqHyWZ^*D5Eq<4PH2mDAnhvpxKhl)w*4iPI{Ww*nRh&}LOohYD1sjCT9tq+F*T zz-pFuD#w-PMMB%JG*1cUezh)2qE@OmE4ydMrMjq{)oay88Qt#z>}ln6zm8tYC*{*- z3caFClt18$XdEIeN?mECGmO4Y;}g5p+C{x`aM%=w)#JS~3U|sFS!w6EEW5&_c8^Oi z(vfX;QaYf~)kPIh>Kav(qZ5cvQW5o6!oS^S1)IH?#%>iqp@pvr%(C7Lo1Rx1<%B5J zD-A-(e!Y5<&M|3*%eDP5nwB+|7+7LpiGd{s zmKa!KV2Obx29_9DVql4ZB?gulSYlv_fh7i(7+7LpiGd{smKa!KV2Obx29_9DVql4Z aB?gulSYlv_fh7i(7+7LpiGiOE1OFd*c=?O~ literal 0 HcmV?d00001