added 3rd party packages, elog, bigtree

This commit is contained in:
2024-02-27 15:40:00 +01:00
parent 277c22f800
commit 6b59fe16ce
69 changed files with 17449 additions and 0 deletions

View File

@@ -0,0 +1,914 @@
import re
from collections import OrderedDict
from typing import List, Tuple, Type
import numpy as np
import pandas as pd
from bigtree.node.node import Node
from bigtree.tree.export import tree_to_dataframe
from bigtree.tree.search import find_children, find_name
from bigtree.utils.exceptions import DuplicatedNodeError, TreeError
__all__ = [
"add_path_to_tree",
"add_dict_to_tree_by_path",
"add_dict_to_tree_by_name",
"add_dataframe_to_tree_by_path",
"add_dataframe_to_tree_by_name",
"str_to_tree",
"list_to_tree",
"list_to_tree_by_relation",
"dict_to_tree",
"nested_dict_to_tree",
"dataframe_to_tree",
"dataframe_to_tree_by_relation",
]
def add_path_to_tree(
tree: Node,
path: str,
sep: str = "/",
duplicate_name_allowed: bool = True,
node_attrs: dict = {},
) -> Node:
"""Add nodes and attributes to existing tree *in-place*, return node of added path.
Adds to existing tree from list of path strings.
Path should contain `Node` name, separated by `sep`.
- For example: Path string "a/b" refers to Node("b") with parent Node("a").
- Path separator `sep` is for the input `path` and can be different from that of existing tree.
Path can start from root node `name`, or start with `sep`.
- For example: Path string can be "/a/b" or "a/b", if sep is "/".
All paths should start from the same root node.
- For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node.
>>> from bigtree import add_path_to_tree, print_tree
>>> root = Node("a")
>>> add_path_to_tree(root, "a/b/c")
Node(/a/b/c, )
>>> print_tree(root)
a
└── b
└── c
Args:
tree (Node): existing tree
path (str): path to be added to tree
sep (str): path separator for input `path`
duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True
node_attrs (dict): attributes to add to node, key: attribute name, value: attribute value, optional
Returns:
(Node)
"""
if not len(path):
raise ValueError("Path is empty, check `path`")
tree_root = tree.root
tree_sep = tree_root.sep
node_type = tree_root.__class__
branch = path.lstrip(sep).rstrip(sep).split(sep)
if branch[0] != tree_root.node_name:
raise TreeError(
f"Error: Path does not have same root node, expected {tree_root.node_name}, received {branch[0]}\n"
f"Check your input paths or verify that path separator `sep` is set correctly"
)
# Grow tree
node = tree_root
parent_node = tree_root
for idx in range(1, len(branch)):
node_name = branch[idx]
node_path = tree_sep.join(branch[: idx + 1])
if not duplicate_name_allowed:
node = find_name(tree_root, node_name)
if node and not node.path_name.endswith(node_path):
raise DuplicatedNodeError(
f"Node {node_name} already exists, try setting `duplicate_name_allowed` to True "
f"to allow `Node` with same node name"
)
else:
node = find_children(parent_node, node_name)
if not node:
node = node_type(branch[idx])
node.parent = parent_node
parent_node = node
node.set_attrs(node_attrs)
return node
def add_dict_to_tree_by_path(
tree: Node,
path_attrs: dict,
sep: str = "/",
duplicate_name_allowed: bool = True,
) -> Node:
"""Add nodes and attributes to tree *in-place*, return root of tree.
Adds to existing tree from nested dictionary, ``key``: path, ``value``: dict of attribute name and attribute value.
Path should contain `Node` name, separated by `sep`.
- For example: Path string "a/b" refers to Node("b") with parent Node("a").
- Path separator `sep` is for the input `path` and can be different from that of existing tree.
Path can start from root node `name`, or start with `sep`.
- For example: Path string can be "/a/b" or "a/b", if sep is "/".
All paths should start from the same root node.
- For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node.
>>> from bigtree import Node, add_dict_to_tree_by_path, print_tree
>>> root = Node("a")
>>> path_dict = {
... "a": {"age": 90},
... "a/b": {"age": 65},
... "a/c": {"age": 60},
... "a/b/d": {"age": 40},
... "a/b/e": {"age": 35},
... "a/c/f": {"age": 38},
... "a/b/e/g": {"age": 10},
... "a/b/e/h": {"age": 6},
... }
>>> root = add_dict_to_tree_by_path(root, path_dict)
>>> print_tree(root)
a
├── b
│ ├── d
│ └── e
│ ├── g
│ └── h
└── c
└── f
Args:
tree (Node): existing tree
path_attrs (dict): dictionary containing node path and attribute information,
key: node path, value: dict of node attribute name and attribute value
sep (str): path separator for input `path_attrs`
duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True
Returns:
(Node)
"""
if not len(path_attrs):
raise ValueError("Dictionary does not contain any data, check `path_attrs`")
tree_root = tree.root
for k, v in path_attrs.items():
add_path_to_tree(
tree_root,
k,
sep=sep,
duplicate_name_allowed=duplicate_name_allowed,
node_attrs=v,
)
return tree_root
def add_dict_to_tree_by_name(
tree: Node, path_attrs: dict, join_type: str = "left"
) -> Node:
"""Add attributes to tree, return *new* root of tree.
Adds to existing tree from nested dictionary, ``key``: name, ``value``: dict of attribute name and attribute value.
Function can return all existing tree nodes or only tree nodes that are in the input dictionary keys.
Input dictionary keys that are not existing node names will be ignored.
Note that if multiple nodes have the same name, attributes will be added to all nodes sharing same name.
>>> from bigtree import Node, add_dict_to_tree_by_name, print_tree
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> name_dict = {
... "a": {"age": 90},
... "b": {"age": 65},
... }
>>> root = add_dict_to_tree_by_name(root, name_dict)
>>> print_tree(root, attr_list=["age"])
a [age=90]
└── b [age=65]
Args:
tree (Node): existing tree
path_attrs (dict): dictionary containing node name and attribute information,
key: node name, value: dict of node attribute name and attribute value
join_type (str): join type with attribute, default of 'left' takes existing tree nodes,
if join_type is set to 'inner' it will only take tree nodes that are in `path_attrs` key and drop others
Returns:
(Node)
"""
if join_type not in ["inner", "left"]:
raise ValueError("`join_type` must be one of 'inner' or 'left'")
if not len(path_attrs):
raise ValueError("Dictionary does not contain any data, check `path_attrs`")
# Convert dictionary to dataframe
data = pd.DataFrame(path_attrs).T.rename_axis("NAME").reset_index()
return add_dataframe_to_tree_by_name(tree, data=data, join_type=join_type)
def add_dataframe_to_tree_by_path(
tree: Node,
data: pd.DataFrame,
path_col: str = "",
attribute_cols: list = [],
sep: str = "/",
duplicate_name_allowed: bool = True,
) -> Node:
"""Add nodes and attributes to tree *in-place*, return root of tree.
`path_col` and `attribute_cols` specify columns for node path and attributes to add to existing tree.
If columns are not specified, `path_col` takes first column and all other columns are `attribute_cols`
Path in path column should contain `Node` name, separated by `sep`.
- For example: Path string "a/b" refers to Node("b") with parent Node("a").
- Path separator `sep` is for the input `path_col` and can be different from that of existing tree.
Path in path column can start from root node `name`, or start with `sep`.
- For example: Path string can be "/a/b" or "a/b", if sep is "/".
All paths should start from the same root node.
- For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node.
>>> import pandas as pd
>>> from bigtree import add_dataframe_to_tree_by_path, print_tree
>>> root = Node("a")
>>> path_data = pd.DataFrame([
... ["a", 90],
... ["a/b", 65],
... ["a/c", 60],
... ["a/b/d", 40],
... ["a/b/e", 35],
... ["a/c/f", 38],
... ["a/b/e/g", 10],
... ["a/b/e/h", 6],
... ],
... columns=["PATH", "age"]
... )
>>> root = add_dataframe_to_tree_by_path(root, path_data)
>>> print_tree(root, attr_list=["age"])
a [age=90]
├── b [age=65]
│ ├── d [age=40]
│ └── e [age=35]
│ ├── g [age=10]
│ └── h [age=6]
└── c [age=60]
└── f [age=38]
Args:
tree (Node): existing tree
data (pandas.DataFrame): data containing node path and attribute information
path_col (str): column of data containing `path_name` information,
if not set, it will take the first column of data
attribute_cols (list): columns of data containing node attribute information,
if not set, it will take all columns of data except `path_col`
sep (str): path separator for input `path_col`
duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True
Returns:
(Node)
"""
if not len(data.columns):
raise ValueError("Data does not contain any columns, check `data`")
if not len(data):
raise ValueError("Data does not contain any rows, check `data`")
if not path_col:
path_col = data.columns[0]
if not len(attribute_cols):
attribute_cols = list(data.columns)
attribute_cols.remove(path_col)
tree_root = tree.root
data[path_col] = data[path_col].str.lstrip(sep).str.rstrip(sep)
data2 = data.copy()[[path_col] + attribute_cols].astype(str).drop_duplicates()
_duplicate_check = (
data2[path_col]
.value_counts()
.to_frame("counts")
.rename_axis(path_col)
.reset_index()
)
_duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1]
if len(_duplicate_check):
raise ValueError(
f"There exists duplicate path with different attributes\nCheck {_duplicate_check}"
)
for row in data.to_dict(orient="index").values():
node_attrs = row.copy()
del node_attrs[path_col]
node_attrs = {k: v for k, v in node_attrs.items() if not np.all(pd.isnull(v))}
add_path_to_tree(
tree_root,
row[path_col],
sep=sep,
duplicate_name_allowed=duplicate_name_allowed,
node_attrs=node_attrs,
)
return tree_root
def add_dataframe_to_tree_by_name(
tree: Node,
data: pd.DataFrame,
name_col: str = "",
attribute_cols: list = [],
join_type: str = "left",
):
"""Add attributes to tree, return *new* root of tree.
`name_col` and `attribute_cols` specify columns for node name and attributes to add to existing tree.
If columns are not specified, the first column will be taken as name column and all other columns as attributes.
Function can return all existing tree nodes or only tree nodes that are in the input data node names.
Input data node names that are not existing node names will be ignored.
Note that if multiple nodes have the same name, attributes will be added to all nodes sharing same name.
>>> import pandas as pd
>>> from bigtree import add_dataframe_to_tree_by_name, print_tree
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> name_data = pd.DataFrame([
... ["a", 90],
... ["b", 65],
... ],
... columns=["NAME", "age"]
... )
>>> root = add_dataframe_to_tree_by_name(root, name_data)
>>> print_tree(root, attr_list=["age"])
a [age=90]
└── b [age=65]
Args:
tree (Node): existing tree
data (pandas.DataFrame): data containing node name and attribute information
name_col (str): column of data containing `name` information,
if not set, it will take the first column of data
attribute_cols (list): column(s) of data containing node attribute information,
if not set, it will take all columns of data except path_col
join_type (str): join type with attribute, default of 'left' takes existing tree nodes,
if join_type is set to 'inner' it will only take tree nodes with attributes and drop the other nodes
Returns:
(Node)
"""
if join_type not in ["inner", "left"]:
raise ValueError("`join_type` must be one of 'inner' or 'left'")
if not len(data.columns):
raise ValueError("Data does not contain any columns, check `data`")
if not len(data):
raise ValueError("Data does not contain any rows, check `data`")
if not name_col:
name_col = data.columns[0]
if not len(attribute_cols):
attribute_cols = list(data.columns)
attribute_cols.remove(name_col)
# Attribute data
path_col = "PATH"
data2 = data.copy()[[name_col] + attribute_cols].astype(str).drop_duplicates()
_duplicate_check = (
data2[name_col]
.value_counts()
.to_frame("counts")
.rename_axis(name_col)
.reset_index()
)
_duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1]
if len(_duplicate_check):
raise ValueError(
f"There exists duplicate name with different attributes\nCheck {_duplicate_check}"
)
# Tree data
tree_root = tree.root
sep = tree_root.sep
node_type = tree_root.__class__
data_tree = tree_to_dataframe(
tree_root, name_col=name_col, path_col=path_col, all_attrs=True
)
common_cols = list(set(data_tree.columns).intersection(attribute_cols))
data_tree = data_tree.drop(columns=common_cols)
# Attribute data
data_tree_attrs = pd.merge(data_tree, data, on=name_col, how=join_type)
data_tree_attrs = data_tree_attrs.drop(columns=name_col)
return dataframe_to_tree(
data_tree_attrs, path_col=path_col, sep=sep, node_type=node_type
)
def str_to_tree(
tree_string: str,
tree_prefix_list: List[str] = [],
node_type: Type[Node] = Node,
) -> Node:
r"""Construct tree from tree string
>>> from bigtree import str_to_tree, print_tree
>>> tree_str = 'a\n├── b\n│ ├── d\n│ └── e\n│ ├── g\n│ └── h\n└── c\n └── f'
>>> root = str_to_tree(tree_str, tree_prefix_list=["├──", "└──"])
>>> print_tree(root)
a
├── b
│ ├── d
│ └── e
│ ├── g
│ └── h
└── c
└── f
Args:
tree_string (str): String to construct tree
tree_prefix_list (list): List of prefix to mark the end of tree branch/stem and start of node name, optional.
If not specified, it will infer unicode characters and whitespace as prefix.
node_type (Type[Node]): node type of tree to be created, defaults to Node
Returns:
(Node)
"""
tree_string = tree_string.strip("\n")
if not len(tree_string):
raise ValueError("Tree string does not contain any data, check `tree_string`")
tree_list = tree_string.split("\n")
tree_root = node_type(tree_list[0])
# Infer prefix length
prefix_length = None
cur_parent = tree_root
for node_str in tree_list[1:]:
if len(tree_prefix_list):
node_name = re.split("|".join(tree_prefix_list), node_str)[-1].lstrip()
else:
node_name = node_str.encode("ascii", "ignore").decode("ascii").lstrip()
# Find node parent
if not prefix_length:
prefix_length = node_str.index(node_name)
if not prefix_length:
raise ValueError(
f"Invalid prefix, prefix should be unicode character or whitespace, "
f"otherwise specify one or more prefixes in `tree_prefix_list`, check: {node_str}"
)
node_prefix_length = node_str.index(node_name)
if node_prefix_length % prefix_length:
raise ValueError(
f"Tree string have different prefix length, check branch: {node_str}"
)
while cur_parent.depth > node_prefix_length / prefix_length:
cur_parent = cur_parent.parent
# Link node
child_node = node_type(node_name)
child_node.parent = cur_parent
cur_parent = child_node
return tree_root
def list_to_tree(
paths: list,
sep: str = "/",
duplicate_name_allowed: bool = True,
node_type: Type[Node] = Node,
) -> Node:
"""Construct tree from list of path strings.
Path should contain `Node` name, separated by `sep`.
- For example: Path string "a/b" refers to Node("b") with parent Node("a").
Path can start from root node `name`, or start with `sep`.
- For example: Path string can be "/a/b" or "a/b", if sep is "/".
All paths should start from the same root node.
- For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node.
>>> from bigtree import list_to_tree, print_tree
>>> path_list = ["a/b", "a/c", "a/b/d", "a/b/e", "a/c/f", "a/b/e/g", "a/b/e/h"]
>>> root = list_to_tree(path_list)
>>> print_tree(root)
a
├── b
│ ├── d
│ └── e
│ ├── g
│ └── h
└── c
└── f
Args:
paths (list): list containing path strings
sep (str): path separator for input `paths` and created tree, defaults to `/`
duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True
node_type (Type[Node]): node type of tree to be created, defaults to Node
Returns:
(Node)
"""
if not len(paths):
raise ValueError("Path list does not contain any data, check `paths`")
# Remove duplicates
paths = list(OrderedDict.fromkeys(paths))
# Construct root node
root_name = paths[0].lstrip(sep).split(sep)[0]
root_node = node_type(root_name)
root_node.sep = sep
for path in paths:
add_path_to_tree(
root_node, path, sep=sep, duplicate_name_allowed=duplicate_name_allowed
)
root_node.sep = sep
return root_node
def list_to_tree_by_relation(
relations: List[Tuple[str, str]],
node_type: Type[Node] = Node,
) -> Node:
"""Construct tree from list of tuple containing parent-child names.
Note that node names must be unique since tree is created from parent-child names,
except for leaf nodes - names of leaf nodes may be repeated as there is no confusion.
>>> from bigtree import list_to_tree_by_relation, print_tree
>>> relations_list = [("a", "b"), ("a", "c"), ("b", "d"), ("b", "e"), ("c", "f"), ("e", "g"), ("e", "h")]
>>> root = list_to_tree_by_relation(relations_list)
>>> print_tree(root)
a
├── b
│ ├── d
│ └── e
│ ├── g
│ └── h
└── c
└── f
Args:
relations (list): list containing tuple containing parent-child names
node_type (Type[Node]): node type of tree to be created, defaults to Node
Returns:
(Node)
"""
if not len(relations):
raise ValueError("Path list does not contain any data, check `relations`")
relation_data = pd.DataFrame(relations, columns=["parent", "child"])
return dataframe_to_tree_by_relation(
relation_data, child_col="child", parent_col="parent", node_type=node_type
)
def dict_to_tree(
path_attrs: dict,
sep: str = "/",
duplicate_name_allowed: bool = True,
node_type: Type[Node] = Node,
) -> Node:
"""Construct tree from nested dictionary using path,
``key``: path, ``value``: dict of attribute name and attribute value.
Path should contain `Node` name, separated by `sep`.
- For example: Path string "a/b" refers to Node("b") with parent Node("a").
Path can start from root node `name`, or start with `sep`.
- For example: Path string can be "/a/b" or "a/b", if sep is "/".
All paths should start from the same root node.
- For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node.
>>> from bigtree import dict_to_tree, print_tree
>>> path_dict = {
... "a": {"age": 90},
... "a/b": {"age": 65},
... "a/c": {"age": 60},
... "a/b/d": {"age": 40},
... "a/b/e": {"age": 35},
... "a/c/f": {"age": 38},
... "a/b/e/g": {"age": 10},
... "a/b/e/h": {"age": 6},
... }
>>> root = dict_to_tree(path_dict)
>>> print_tree(root, attr_list=["age"])
a [age=90]
├── b [age=65]
│ ├── d [age=40]
│ └── e [age=35]
│ ├── g [age=10]
│ └── h [age=6]
└── c [age=60]
└── f [age=38]
Args:
path_attrs (dict): dictionary containing path and node attribute information,
key: path, value: dict of tree attribute and attribute value
sep (str): path separator of input `path_attrs` and created tree, defaults to `/`
duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True
node_type (Type[Node]): node type of tree to be created, defaults to Node
Returns:
(Node)
"""
if not len(path_attrs):
raise ValueError("Dictionary does not contain any data, check `path_attrs`")
# Convert dictionary to dataframe
data = pd.DataFrame(path_attrs).T.rename_axis("PATH").reset_index()
return dataframe_to_tree(
data,
sep=sep,
duplicate_name_allowed=duplicate_name_allowed,
node_type=node_type,
)
def nested_dict_to_tree(
node_attrs: dict,
name_key: str = "name",
child_key: str = "children",
node_type: Type[Node] = Node,
) -> Node:
"""Construct tree from nested recursive dictionary.
- ``key``: `name_key`, `child_key`, or any attributes key.
- ``value`` of `name_key` (str): node name.
- ``value`` of `child_key` (list): list of dict containing `name_key` and `child_key` (recursive).
>>> from bigtree import nested_dict_to_tree, print_tree
>>> path_dict = {
... "name": "a",
... "age": 90,
... "children": [
... {"name": "b",
... "age": 65,
... "children": [
... {"name": "d", "age": 40},
... {"name": "e", "age": 35, "children": [
... {"name": "g", "age": 10},
... ]},
... ]},
... ],
... }
>>> root = nested_dict_to_tree(path_dict)
>>> print_tree(root, attr_list=["age"])
a [age=90]
└── b [age=65]
├── d [age=40]
└── e [age=35]
└── g [age=10]
Args:
node_attrs (dict): dictionary containing node, children, and node attribute information,
key: `name_key` and `child_key`
value of `name_key` (str): node name
value of `child_key` (list): list of dict containing `name_key` and `child_key` (recursive)
name_key (str): key of node name, value is type str
child_key (str): key of child list, value is type list
node_type (Type[Node]): node type of tree to be created, defaults to Node
Returns:
(Node)
"""
def recursive_add_child(child_dict, parent_node=None):
child_dict = child_dict.copy()
node_name = child_dict.pop(name_key)
node_children = child_dict.pop(child_key, [])
node = node_type(node_name, parent=parent_node, **child_dict)
for _child in node_children:
recursive_add_child(_child, parent_node=node)
return node
root_node = recursive_add_child(node_attrs)
return root_node
def dataframe_to_tree(
data: pd.DataFrame,
path_col: str = "",
attribute_cols: list = [],
sep: str = "/",
duplicate_name_allowed: bool = True,
node_type: Type[Node] = Node,
) -> Node:
"""Construct tree from pandas DataFrame using path, return root of tree.
`path_col` and `attribute_cols` specify columns for node path and attributes to construct tree.
If columns are not specified, `path_col` takes first column and all other columns are `attribute_cols`.
Path in path column can start from root node `name`, or start with `sep`.
- For example: Path string can be "/a/b" or "a/b", if sep is "/".
Path in path column should contain `Node` name, separated by `sep`.
- For example: Path string "a/b" refers to Node("b") with parent Node("a").
All paths should start from the same root node.
- For example: Path strings should be "a/b", "a/c", "a/b/d" etc. and should not start with another root node.
>>> import pandas as pd
>>> from bigtree import dataframe_to_tree, print_tree
>>> path_data = pd.DataFrame([
... ["a", 90],
... ["a/b", 65],
... ["a/c", 60],
... ["a/b/d", 40],
... ["a/b/e", 35],
... ["a/c/f", 38],
... ["a/b/e/g", 10],
... ["a/b/e/h", 6],
... ],
... columns=["PATH", "age"]
... )
>>> root = dataframe_to_tree(path_data)
>>> print_tree(root, attr_list=["age"])
a [age=90]
├── b [age=65]
│ ├── d [age=40]
│ └── e [age=35]
│ ├── g [age=10]
│ └── h [age=6]
└── c [age=60]
└── f [age=38]
Args:
data (pandas.DataFrame): data containing path and node attribute information
path_col (str): column of data containing `path_name` information,
if not set, it will take the first column of data
attribute_cols (list): columns of data containing node attribute information,
if not set, it will take all columns of data except `path_col`
sep (str): path separator of input `path_col` and created tree, defaults to `/`
duplicate_name_allowed (bool): indicator if nodes with duplicated `Node` name is allowed, defaults to True
node_type (Type[Node]): node type of tree to be created, defaults to Node
Returns:
(Node)
"""
if not len(data.columns):
raise ValueError("Data does not contain any columns, check `data`")
if not len(data):
raise ValueError("Data does not contain any rows, check `data`")
if not path_col:
path_col = data.columns[0]
if not len(attribute_cols):
attribute_cols = list(data.columns)
attribute_cols.remove(path_col)
data[path_col] = data[path_col].str.lstrip(sep).str.rstrip(sep)
data2 = data.copy()[[path_col] + attribute_cols].astype(str).drop_duplicates()
_duplicate_check = (
data2[path_col]
.value_counts()
.to_frame("counts")
.rename_axis(path_col)
.reset_index()
)
_duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1]
if len(_duplicate_check):
raise ValueError(
f"There exists duplicate path with different attributes\nCheck {_duplicate_check}"
)
root_name = data[path_col].values[0].split(sep)[0]
root_node = node_type(root_name)
add_dataframe_to_tree_by_path(
root_node,
data,
sep=sep,
duplicate_name_allowed=duplicate_name_allowed,
)
root_node.sep = sep
return root_node
def dataframe_to_tree_by_relation(
data: pd.DataFrame,
child_col: str = "",
parent_col: str = "",
attribute_cols: list = [],
node_type: Type[Node] = Node,
) -> Node:
"""Construct tree from pandas DataFrame using parent and child names, return root of tree.
Note that node names must be unique since tree is created from parent-child names,
except for leaf nodes - names of leaf nodes may be repeated as there is no confusion.
`child_col` and `parent_col` specify columns for child name and parent name to construct tree.
`attribute_cols` specify columns for node attribute for child name
If columns are not specified, `child_col` takes first column, `parent_col` takes second column, and all other
columns are `attribute_cols`.
>>> import pandas as pd
>>> from bigtree import dataframe_to_tree_by_relation, print_tree
>>> relation_data = pd.DataFrame([
... ["a", None, 90],
... ["b", "a", 65],
... ["c", "a", 60],
... ["d", "b", 40],
... ["e", "b", 35],
... ["f", "c", 38],
... ["g", "e", 10],
... ["h", "e", 6],
... ],
... columns=["child", "parent", "age"]
... )
>>> root = dataframe_to_tree_by_relation(relation_data)
>>> print_tree(root, attr_list=["age"])
a [age=90]
├── b [age=65]
│ ├── d [age=40]
│ └── e [age=35]
│ ├── g [age=10]
│ └── h [age=6]
└── c [age=60]
└── f [age=38]
Args:
data (pandas.DataFrame): data containing path and node attribute information
child_col (str): column of data containing child name information, defaults to None
if not set, it will take the first column of data
parent_col (str): column of data containing parent name information, defaults to None
if not set, it will take the second column of data
attribute_cols (list): columns of data containing node attribute information,
if not set, it will take all columns of data except `child_col` and `parent_col`
node_type (Type[Node]): node type of tree to be created, defaults to Node
Returns:
(Node)
"""
if not len(data.columns):
raise ValueError("Data does not contain any columns, check `data`")
if not len(data):
raise ValueError("Data does not contain any rows, check `data`")
if not child_col:
child_col = data.columns[0]
if not parent_col:
parent_col = data.columns[1]
if not len(attribute_cols):
attribute_cols = list(data.columns)
attribute_cols.remove(child_col)
attribute_cols.remove(parent_col)
data_check = data.copy()[[child_col, parent_col]].drop_duplicates()
# Filter for child nodes that are parent of other nodes
data_check = data_check[data_check[child_col].isin(data_check[parent_col])]
_duplicate_check = (
data_check[child_col]
.value_counts()
.to_frame("counts")
.rename_axis(child_col)
.reset_index()
)
_duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1]
if len(_duplicate_check):
raise ValueError(
f"There exists duplicate child with different parent where the child is also a parent node.\n"
f"Duplicated node names should not happen, but can only exist in leaf nodes to avoid confusion.\n"
f"Check {_duplicate_check}"
)
# If parent-child contains None -> root
root_row = data[data[parent_col].isnull()]
root_names = list(root_row[child_col])
if not len(root_names):
root_names = list(set(data[parent_col]) - set(data[child_col]))
if len(root_names) != 1:
raise ValueError(f"Unable to determine root node\nCheck {root_names}")
root_name = root_names[0]
root_node = node_type(root_name)
def retrieve_attr(row):
node_attrs = row.copy()
node_attrs["name"] = node_attrs[child_col]
del node_attrs[child_col]
del node_attrs[parent_col]
_node_attrs = {k: v for k, v in node_attrs.items() if not np.all(pd.isnull(v))}
return _node_attrs
def recursive_create_child(parent_node):
child_rows = data[data[parent_col] == parent_node.node_name]
for row in child_rows.to_dict(orient="index").values():
child_node = node_type(**retrieve_attr(row))
child_node.parent = parent_node
recursive_create_child(child_node)
# Create root node attributes
if len(root_row):
row = list(root_row.to_dict(orient="index").values())[0]
root_node.set_attrs(retrieve_attr(row))
recursive_create_child(root_node)
return root_node

View File

@@ -0,0 +1,831 @@
import collections
from typing import Any, Dict, List, Tuple, Union
import pandas as pd
from bigtree.node.node import Node
from bigtree.tree.search import find_path
from bigtree.utils.iterators import preorder_iter
__all__ = [
"print_tree",
"yield_tree",
"tree_to_dict",
"tree_to_nested_dict",
"tree_to_dataframe",
"tree_to_dot",
"tree_to_pillow",
]
available_styles = {
"ansi": ("| ", "|-- ", "`-- "),
"ascii": ("| ", "|-- ", "+-- "),
"const": ("\u2502 ", "\u251c\u2500\u2500 ", "\u2514\u2500\u2500 "),
"const_bold": ("\u2503 ", "\u2523\u2501\u2501 ", "\u2517\u2501\u2501 "),
"rounded": ("\u2502 ", "\u251c\u2500\u2500 ", "\u2570\u2500\u2500 "),
"double": ("\u2551 ", "\u2560\u2550\u2550 ", "\u255a\u2550\u2550 "),
"custom": ("", "", ""),
}
def print_tree(
tree: Node,
node_name_or_path: str = "",
max_depth: int = None,
attr_list: List[str] = None,
all_attrs: bool = False,
attr_omit_null: bool = True,
attr_bracket: List[str] = ["[", "]"],
style: str = "const",
custom_style: List[str] = [],
):
"""Print tree to console, starting from `tree`.
- Able to select which node to print from, resulting in a subtree, using `node_name_or_path`
- Able to customize for maximum depth to print, using `max_depth`
- Able to choose which attributes to show or show all attributes, using `attr_name_filter` and `all_attrs`
- Able to omit showing of attributes if it is null, using `attr_omit_null`
- Able to customize open and close brackets if attributes are shown, using `attr_bracket`
- Able to customize style, to choose from `ansi`, `ascii`, `const`, `rounded`, `double`, and `custom` style
- Default style is `const` style
- If style is set to custom, user can choose their own style for stem, branch and final stem icons
- Stem, branch, and final stem symbol should have the same number of characters
**Printing tree**
>>> from bigtree import Node, print_tree
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=b)
>>> e = Node("e", age=35, parent=b)
>>> print_tree(root)
a
├── b
│ ├── d
│ └── e
└── c
**Printing Sub-tree**
>>> print_tree(root, node_name_or_path="b")
b
├── d
└── e
>>> print_tree(root, max_depth=2)
a
├── b
└── c
**Printing Attributes**
>>> print_tree(root, attr_list=["age"])
a [age=90]
├── b [age=65]
│ ├── d [age=40]
│ └── e [age=35]
└── c [age=60]
>>> print_tree(root, attr_list=["age"], attr_bracket=["*(", ")"])
a *(age=90)
├── b *(age=65)
│ ├── d *(age=40)
│ └── e *(age=35)
└── c *(age=60)
**Available Styles**
>>> print_tree(root, style="ansi")
a
|-- b
| |-- d
| `-- e
`-- c
>>> print_tree(root, style="ascii")
a
|-- b
| |-- d
| +-- e
+-- c
>>> print_tree(root, style="const")
a
├── b
│ ├── d
│ └── e
└── c
>>> print_tree(root, style="const_bold")
a
┣━━ b
┃ ┣━━ d
┃ ┗━━ e
┗━━ c
>>> print_tree(root, style="rounded")
a
├── b
│ ├── d
│ ╰── e
╰── c
>>> print_tree(root, style="double")
a
╠══ b
║ ╠══ d
║ ╚══ e
╚══ c
Args:
tree (Node): tree to print
node_name_or_path (str): node to print from, becomes the root node of printing
max_depth (int): maximum depth of tree to print, based on `depth` attribute, optional
attr_list (list): list of node attributes to print, optional
all_attrs (bool): indicator to show all attributes, overrides `attr_list`
attr_omit_null (bool): indicator whether to omit showing of null attributes, defaults to True
attr_bracket (List[str]): open and close bracket for `all_attrs` or `attr_list`
style (str): style of print, defaults to abstract style
custom_style (List[str]): style of stem, branch and final stem, used when `style` is set to 'custom'
"""
for pre_str, fill_str, _node in yield_tree(
tree=tree,
node_name_or_path=node_name_or_path,
max_depth=max_depth,
style=style,
custom_style=custom_style,
):
# Get node_str (node name and attributes)
attr_str = ""
if all_attrs or attr_list:
if len(attr_bracket) != 2:
raise ValueError(
f"Expect open and close brackets in `attr_bracket`, received {attr_bracket}"
)
attr_bracket_open, attr_bracket_close = attr_bracket
if all_attrs:
attrs = _node.describe(exclude_attributes=["name"], exclude_prefix="_")
attr_str_list = [f"{k}={v}" for k, v in attrs]
else:
if attr_omit_null:
attr_str_list = [
f"{attr_name}={_node.get_attr(attr_name)}"
for attr_name in attr_list
if _node.get_attr(attr_name)
]
else:
attr_str_list = [
f"{attr_name}={_node.get_attr(attr_name)}"
for attr_name in attr_list
]
attr_str = ", ".join(attr_str_list)
if attr_str:
attr_str = f" {attr_bracket_open}{attr_str}{attr_bracket_close}"
node_str = f"{_node.node_name}{attr_str}"
print(f"{pre_str}{fill_str}{node_str}")
def yield_tree(
tree: Node,
node_name_or_path: str = "",
max_depth: int = None,
style: str = "const",
custom_style: List[str] = [],
):
"""Generator method for customizing printing of tree, starting from `tree`.
- Able to select which node to print from, resulting in a subtree, using `node_name_or_path`
- Able to customize for maximum depth to print, using `max_depth`
- Able to customize style, to choose from `ansi`, `ascii`, `const`, `rounded`, `double`, and `custom` style
- Default style is `const` style
- If style is set to custom, user can choose their own style for stem, branch and final stem icons
- Stem, branch, and final stem symbol should have the same number of characters
**Printing tree**
>>> from bigtree import Node, print_tree
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=b)
>>> e = Node("e", age=35, parent=b)
>>> for branch, stem, node in yield_tree(root):
... print(f"{branch}{stem}{node.node_name}")
a
├── b
│ ├── d
│ └── e
└── c
**Printing Sub-tree**
>>> for branch, stem, node in yield_tree(root, node_name_or_path="b"):
... print(f"{branch}{stem}{node.node_name}")
b
├── d
└── e
>>> for branch, stem, node in yield_tree(root, max_depth=2):
... print(f"{branch}{stem}{node.node_name}")
a
├── b
└── c
**Available Styles**
>>> for branch, stem, node in yield_tree(root, style="ansi"):
... print(f"{branch}{stem}{node.node_name}")
a
|-- b
| |-- d
| `-- e
`-- c
>>> for branch, stem, node in yield_tree(root, style="ascii"):
... print(f"{branch}{stem}{node.node_name}")
a
|-- b
| |-- d
| +-- e
+-- c
>>> for branch, stem, node in yield_tree(root, style="const"):
... print(f"{branch}{stem}{node.node_name}")
a
├── b
│ ├── d
│ └── e
└── c
>>> for branch, stem, node in yield_tree(root, style="const_bold"):
... print(f"{branch}{stem}{node.node_name}")
a
┣━━ b
┃ ┣━━ d
┃ ┗━━ e
┗━━ c
>>> for branch, stem, node in yield_tree(root, style="rounded"):
... print(f"{branch}{stem}{node.node_name}")
a
├── b
│ ├── d
│ ╰── e
╰── c
>>> for branch, stem, node in yield_tree(root, style="double"):
... print(f"{branch}{stem}{node.node_name}")
a
╠══ b
║ ╠══ d
║ ╚══ e
╚══ c
**Printing Attributes**
>>> for branch, stem, node in yield_tree(root, style="const"):
... print(f"{branch}{stem}{node.node_name} [age={node.age}]")
a [age=90]
├── b [age=65]
│ ├── d [age=40]
│ └── e [age=35]
└── c [age=60]
Args:
tree (Node): tree to print
node_name_or_path (str): node to print from, becomes the root node of printing, optional
max_depth (int): maximum depth of tree to print, based on `depth` attribute, optional
style (str): style of print, defaults to abstract style
custom_style (List[str]): style of stem, branch and final stem, used when `style` is set to 'custom'
"""
if style not in available_styles.keys():
raise ValueError(
f"Choose one of {available_styles.keys()} style, use `custom` to define own style"
)
tree = tree.copy()
if node_name_or_path:
tree = find_path(tree, node_name_or_path)
if not tree.is_root:
tree.parent = None
# Set style
if style == "custom":
if len(custom_style) != 3:
raise ValueError(
"Custom style selected, please specify the style of stem, branch, and final stem in `custom_style`"
)
style_stem, style_branch, style_stem_final = custom_style
else:
style_stem, style_branch, style_stem_final = available_styles[style]
if not len(style_stem) == len(style_branch) == len(style_stem_final):
raise ValueError(
"`style_stem`, `style_branch`, and `style_stem_final` are of different length"
)
gap_str = " " * len(style_stem)
unclosed_depth = set()
initial_depth = tree.depth
for _node in preorder_iter(tree, max_depth=max_depth):
pre_str = ""
fill_str = ""
if not _node.is_root:
node_depth = _node.depth - initial_depth
# Get fill_str (style_branch or style_stem_final)
if _node.right_sibling:
unclosed_depth.add(node_depth)
fill_str = style_branch
else:
if node_depth in unclosed_depth:
unclosed_depth.remove(node_depth)
fill_str = style_stem_final
# Get pre_str (style_stem, style_branch, style_stem_final, or gap)
pre_str = ""
for _depth in range(1, node_depth):
if _depth in unclosed_depth:
pre_str += style_stem
else:
pre_str += gap_str
yield pre_str, fill_str, _node
def tree_to_dict(
tree: Node,
name_key: str = "name",
parent_key: str = "",
attr_dict: dict = {},
all_attrs: bool = False,
max_depth: int = None,
skip_depth: int = None,
leaf_only: bool = False,
) -> Dict[str, Any]:
"""Export tree to dictionary.
All descendants from `tree` will be exported, `tree` can be the root node or child node of tree.
Exported dictionary will have key as node path, and node attributes as a nested dictionary.
>>> from bigtree import Node, tree_to_dict
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=b)
>>> e = Node("e", age=35, parent=b)
>>> tree_to_dict(root, name_key="name", parent_key="parent", attr_dict={"age": "person age"})
{'/a': {'name': 'a', 'parent': None, 'person age': 90}, '/a/b': {'name': 'b', 'parent': 'a', 'person age': 65}, '/a/b/d': {'name': 'd', 'parent': 'b', 'person age': 40}, '/a/b/e': {'name': 'e', 'parent': 'b', 'person age': 35}, '/a/c': {'name': 'c', 'parent': 'a', 'person age': 60}}
For a subset of a tree
>>> tree_to_dict(c, name_key="name", parent_key="parent", attr_dict={"age": "person age"})
{'/a/c': {'name': 'c', 'parent': 'a', 'person age': 60}}
Args:
tree (Node): tree to be exported
name_key (str): dictionary key for `node.node_name`, defaults to 'name'
parent_key (str): dictionary key for `node.parent.node_name`, optional
attr_dict (dict): dictionary mapping node attributes to dictionary key,
key: node attributes, value: corresponding dictionary key, optional
all_attrs (bool): indicator whether to retrieve all `Node` attributes
max_depth (int): maximum depth to export tree, optional
skip_depth (int): number of initial depth to skip, optional
leaf_only (bool): indicator to retrieve only information from leaf nodes
Returns:
(dict)
"""
tree = tree.copy()
data_dict = {}
def recursive_append(node):
if node:
if (
(not max_depth or node.depth <= max_depth)
and (not skip_depth or node.depth > skip_depth)
and (not leaf_only or node.is_leaf)
):
data_child = {}
if name_key:
data_child[name_key] = node.node_name
if parent_key:
parent_name = None
if node.parent:
parent_name = node.parent.node_name
data_child[parent_key] = parent_name
if all_attrs:
data_child.update(
dict(
node.describe(
exclude_attributes=["name"], exclude_prefix="_"
)
)
)
else:
for k, v in attr_dict.items():
data_child[v] = node.get_attr(k)
data_dict[node.path_name] = data_child
for _node in node.children:
recursive_append(_node)
recursive_append(tree)
return data_dict
def tree_to_nested_dict(
tree: Node,
name_key: str = "name",
child_key: str = "children",
attr_dict: dict = {},
all_attrs: bool = False,
max_depth: int = None,
) -> Dict[str, Any]:
"""Export tree to nested dictionary.
All descendants from `tree` will be exported, `tree` can be the root node or child node of tree.
Exported dictionary will have key as node attribute names, and children as a nested recursive dictionary.
>>> from bigtree import Node, tree_to_nested_dict
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=b)
>>> e = Node("e", age=35, parent=b)
>>> tree_to_nested_dict(root, all_attrs=True)
{'name': 'a', 'age': 90, 'children': [{'name': 'b', 'age': 65, 'children': [{'name': 'd', 'age': 40}, {'name': 'e', 'age': 35}]}, {'name': 'c', 'age': 60}]}
Args:
tree (Node): tree to be exported
name_key (str): dictionary key for `node.node_name`, defaults to 'name'
child_key (str): dictionary key for list of children, optional
attr_dict (dict): dictionary mapping node attributes to dictionary key,
key: node attributes, value: corresponding dictionary key, optional
all_attrs (bool): indicator whether to retrieve all `Node` attributes
max_depth (int): maximum depth to export tree, optional
Returns:
(dict)
"""
tree = tree.copy()
data_dict = {}
def recursive_append(node, parent_dict):
if node:
if not max_depth or node.depth <= max_depth:
data_child = {name_key: node.node_name}
if all_attrs:
data_child.update(
dict(
node.describe(
exclude_attributes=["name"], exclude_prefix="_"
)
)
)
else:
for k, v in attr_dict.items():
data_child[v] = node.get_attr(k)
if child_key in parent_dict:
parent_dict[child_key].append(data_child)
else:
parent_dict[child_key] = [data_child]
for _node in node.children:
recursive_append(_node, data_child)
recursive_append(tree, data_dict)
return data_dict[child_key][0]
def tree_to_dataframe(
tree: Node,
path_col: str = "path",
name_col: str = "name",
parent_col: str = "",
attr_dict: dict = {},
all_attrs: bool = False,
max_depth: int = None,
skip_depth: int = None,
leaf_only: bool = False,
) -> pd.DataFrame:
"""Export tree to pandas DataFrame.
All descendants from `tree` will be exported, `tree` can be the root node or child node of tree.
>>> from bigtree import Node, tree_to_dataframe
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=b)
>>> e = Node("e", age=35, parent=b)
>>> tree_to_dataframe(root, name_col="name", parent_col="parent", path_col="path", attr_dict={"age": "person age"})
path name parent person age
0 /a a None 90
1 /a/b b a 65
2 /a/b/d d b 40
3 /a/b/e e b 35
4 /a/c c a 60
For a subset of a tree.
>>> tree_to_dataframe(b, name_col="name", parent_col="parent", path_col="path", attr_dict={"age": "person age"})
path name parent person age
0 /a/b b a 65
1 /a/b/d d b 40
2 /a/b/e e b 35
Args:
tree (Node): tree to be exported
path_col (str): column name for `node.path_name`, optional
name_col (str): column name for `node.node_name`, defaults to 'name'
parent_col (str): column name for `node.parent.node_name`, optional
attr_dict (dict): dictionary mapping node attributes to column name,
key: node attributes, value: corresponding column in dataframe, optional
all_attrs (bool): indicator whether to retrieve all `Node` attributes
max_depth (int): maximum depth to export tree, optional
skip_depth (int): number of initial depth to skip, optional
leaf_only (bool): indicator to retrieve only information from leaf nodes
Returns:
(pd.DataFrame)
"""
tree = tree.copy()
data_list = []
def recursive_append(node):
if node:
if (
(not max_depth or node.depth <= max_depth)
and (not skip_depth or node.depth > skip_depth)
and (not leaf_only or node.is_leaf)
):
data_child = {}
if path_col:
data_child[path_col] = node.path_name
if name_col:
data_child[name_col] = node.node_name
if parent_col:
parent_name = None
if node.parent:
parent_name = node.parent.node_name
data_child[parent_col] = parent_name
if all_attrs:
data_child.update(
node.describe(exclude_attributes=["name"], exclude_prefix="_")
)
else:
for k, v in attr_dict.items():
data_child[v] = node.get_attr(k)
data_list.append(data_child)
for _node in node.children:
recursive_append(_node)
recursive_append(tree)
return pd.DataFrame(data_list)
def tree_to_dot(
tree: Union[Node, List[Node]],
directed: bool = True,
rankdir: str = "TB",
bg_colour: str = None,
node_colour: str = None,
node_shape: str = None,
edge_colour: str = None,
node_attr: str = None,
edge_attr: str = None,
):
r"""Export tree or list of trees to image.
Posible node attributes include style, fillcolor, shape.
>>> from bigtree import Node, tree_to_dot
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=b)
>>> e = Node("e", age=35, parent=b)
>>> graph = tree_to_dot(root)
Export to image, dot file, etc.
>>> graph.write_png("tree.png")
>>> graph.write_dot("tree.dot")
Export to string
>>> graph.to_string()
'strict digraph G {\nrankdir=TB;\na0 [label=a];\nb0 [label=b];\na0 -> b0;\nd0 [label=d];\nb0 -> d0;\ne0 [label=e];\nb0 -> e0;\nc0 [label=c];\na0 -> c0;\n}\n'
Defining node and edge attributes
>>> class CustomNode(Node):
... def __init__(self, name, node_shape="", edge_label="", **kwargs):
... super().__init__(name, **kwargs)
... self.node_shape = node_shape
... self.edge_label = edge_label
...
... @property
... def edge_attr(self):
... if self.edge_label:
... return {"label": self.edge_label}
... return {}
...
... @property
... def node_attr(self):
... if self.node_shape:
... return {"shape": self.node_shape}
... return {}
>>>
>>>
>>> root = CustomNode("a", node_shape="circle")
>>> b = CustomNode("b", edge_label="child", parent=root)
>>> c = CustomNode("c", edge_label="child", parent=root)
>>> d = CustomNode("d", node_shape="square", edge_label="child", parent=b)
>>> e = CustomNode("e", node_shape="square", edge_label="child", parent=b)
>>> graph = tree_to_dot(root, node_colour="gold", node_shape="diamond", node_attr="node_attr", edge_attr="edge_attr")
>>> graph.write_png("assets/custom_tree.png")
.. image:: https://github.com/kayjan/bigtree/raw/master/assets/custom_tree.png
Args:
tree (Node/List[Node]): tree or list of trees to be exported
directed (bool): indicator whether graph should be directed or undirected, defaults to True
rankdir (str): set direction of graph layout, defaults to 'TB' (top to bottom), can be 'BT' (bottom to top),
'LR' (left to right), 'RL' (right to left)
bg_colour (str): background color of image, defaults to None
node_colour (str): fill colour of nodes, defaults to None
node_shape (str): shape of nodes, defaults to None
Possible node_shape include "circle", "square", "diamond", "triangle"
edge_colour (str): colour of edges, defaults to None
node_attr (str): `Node` attribute for node style, overrides `node_colour` and `node_shape`, defaults to None.
Possible node style (attribute value) include {"style": "filled", "fillcolor": "gold", "shape": "diamond"}
edge_attr (str): `Node` attribute for edge style, overrides `edge_colour`, defaults to None.
Possible edge style (attribute value) include {"style": "bold", "label": "edge label", "color": "black"}
Returns:
(pydot.Dot)
"""
try:
import pydot
except ImportError: # pragma: no cover
raise ImportError(
"pydot not available. Please perform a\n\npip install 'bigtree[image]'\n\nto install required dependencies"
)
# Get style
if bg_colour:
graph_style = dict(bgcolor=bg_colour)
else:
graph_style = dict()
if node_colour:
node_style = dict(style="filled", fillcolor=node_colour)
else:
node_style = dict()
if node_shape:
node_style["shape"] = node_shape
if edge_colour:
edge_style = dict(color=edge_colour)
else:
edge_style = dict()
tree = tree.copy()
if directed:
_graph = pydot.Dot(
graph_type="digraph", strict=True, rankdir=rankdir, **graph_style
)
else:
_graph = pydot.Dot(
graph_type="graph", strict=True, rankdir=rankdir, **graph_style
)
if not isinstance(tree, list):
tree = [tree]
for _tree in tree:
if not isinstance(_tree, Node):
raise ValueError("Tree should be of type `Node`, or inherit from `Node`")
name_dict = collections.defaultdict(list)
def recursive_create_node_and_edges(parent_name, child_node):
_node_style = node_style.copy()
_edge_style = edge_style.copy()
child_label = child_node.node_name
if child_node.path_name not in name_dict[child_label]: # pragma: no cover
name_dict[child_label].append(child_node.path_name)
child_name = child_label + str(
name_dict[child_label].index(child_node.path_name)
)
if node_attr and child_node.get_attr(node_attr):
_node_style.update(child_node.get_attr(node_attr))
if edge_attr:
_edge_style.update(child_node.get_attr(edge_attr))
node = pydot.Node(name=child_name, label=child_label, **_node_style)
_graph.add_node(node)
if parent_name is not None:
edge = pydot.Edge(parent_name, child_name, **_edge_style)
_graph.add_edge(edge)
for child in child_node.children:
if child:
recursive_create_node_and_edges(child_name, child)
recursive_create_node_and_edges(None, _tree.root)
return _graph
def tree_to_pillow(
tree: Node,
width: int = 0,
height: int = 0,
start_pos: Tuple[float, float] = (10, 10),
font_family: str = "assets/DejaVuSans.ttf",
font_size: int = 12,
font_colour: Union[Tuple[float, float, float], str] = "black",
bg_colour: Union[Tuple[float, float, float], str] = "white",
**kwargs,
):
"""Export tree to image (JPG, PNG).
Image will be similar format as `print_tree`, accepts additional keyword arguments as input to `yield_tree`
>>> from bigtree import Node, tree_to_pillow
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=b)
>>> e = Node("e", age=35, parent=b)
>>> pillow_image = tree_to_pillow(root)
Export to image (PNG, JPG) file, etc.
>>> pillow_image.save("tree_pillow.png")
>>> pillow_image.save("tree_pillow.jpg")
Args:
tree (Node): tree to be exported
width (int): width of image, optional as width of image is calculated automatically
height (int): height of image, optional as height of image is calculated automatically
start_pos (Tuple[float, float]): start position of text, (x-offset, y-offset), defaults to (10, 10)
font_family (str): file path of font family, requires .ttf file, defaults to DejaVuSans
font_size (int): font size, defaults to 12
font_colour (Union[List[int], str]): font colour, accepts tuple of RGB values or string, defaults to black
bg_colour (Union[List[int], str]): background of image, accepts tuple of RGB values or string, defaults to white
Returns:
(PIL.Image.Image)
"""
try:
from PIL import Image, ImageDraw, ImageFont
except ImportError: # pragma: no cover
raise ImportError(
"Pillow not available. Please perform a\n\npip install 'bigtree[image]'\n\nto install required dependencies"
)
# Initialize font
font = ImageFont.truetype(font_family, font_size)
# Initialize text
image_text = []
for branch, stem, node in yield_tree(tree, **kwargs):
image_text.append(f"{branch}{stem}{node.node_name}\n")
# Calculate image dimension from text, otherwise override with argument
def get_list_of_text_dimensions(text_list):
"""Get list dimensions
Args:
text_list (List[str]): list of texts
Returns:
(List[Iterable[int]]): list of (left, top, right, bottom) bounding box
"""
_image = Image.new("RGB", (0, 0))
_draw = ImageDraw.Draw(_image)
return [_draw.textbbox((0, 0), text_line, font=font) for text_line in text_list]
text_dimensions = get_list_of_text_dimensions(image_text)
text_height = sum(
[text_dimension[3] + text_dimension[1] for text_dimension in text_dimensions]
)
text_width = max(
[text_dimension[2] + text_dimension[0] for text_dimension in text_dimensions]
)
image_text = "".join(image_text)
width = max(width, text_width + 2 * start_pos[0])
height = max(height, text_height + 2 * start_pos[1])
# Initialize and draw image
image = Image.new("RGB", (width, height), bg_colour)
image_draw = ImageDraw.Draw(image)
image_draw.text(start_pos, image_text, font=font, fill=font_colour)
return image

View File

@@ -0,0 +1,201 @@
from typing import Optional, Type
import numpy as np
from bigtree.node.basenode import BaseNode
from bigtree.node.binarynode import BinaryNode
from bigtree.node.node import Node
from bigtree.tree.construct import dataframe_to_tree
from bigtree.tree.export import tree_to_dataframe
from bigtree.tree.search import find_path
from bigtree.utils.exceptions import NotFoundError
__all__ = ["clone_tree", "prune_tree", "get_tree_diff"]
def clone_tree(tree: BaseNode, node_type: Type[BaseNode]) -> BaseNode:
"""Clone tree to another `Node` type.
If the same type is needed, simply do a tree.copy().
>>> from bigtree import BaseNode, Node, clone_tree
>>> root = BaseNode(name="a")
>>> b = BaseNode(name="b", parent=root)
>>> clone_tree(root, Node)
Node(/a, )
Args:
tree (BaseNode): tree to be cloned, must inherit from BaseNode
node_type (Type[BaseNode]): type of cloned tree
Returns:
(BaseNode)
"""
if not isinstance(tree, BaseNode):
raise ValueError(
"Tree should be of type `BaseNode`, or inherit from `BaseNode`"
)
# Start from root
root_info = dict(tree.root.describe(exclude_prefix="_"))
root_node = node_type(**root_info)
def recursive_add_child(_new_parent_node, _parent_node):
for _child in _parent_node.children:
if _child:
child_info = dict(_child.describe(exclude_prefix="_"))
child_node = node_type(**child_info)
child_node.parent = _new_parent_node
recursive_add_child(child_node, _child)
recursive_add_child(root_node, tree.root)
return root_node
def prune_tree(tree: Node, prune_path: str, sep: str = "/") -> Node:
"""Prune tree to leave only the prune path, returns the root of a *copy* of the original tree.
All siblings along the prune path will be removed.
Prune path name should be unique, can be full path or partial path (trailing part of path) or node name.
Path should contain `Node` name, separated by `sep`.
- For example: Path string "a/b" refers to Node("b") with parent Node("a").
>>> from bigtree import Node, prune_tree, print_tree
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> print_tree(root)
a
├── b
└── c
>>> root_pruned = prune_tree(root, "a/b")
>>> print_tree(root_pruned)
a
└── b
Args:
tree (Node): existing tree
prune_path (str): prune path, all siblings along the prune path will be removed
sep (str): path separator
Returns:
(Node)
"""
prune_path = prune_path.replace(sep, tree.sep)
tree_copy = tree.copy()
child = find_path(tree_copy, prune_path)
if not child:
raise NotFoundError(
f"Cannot find any node matching path_name ending with {prune_path}"
)
if isinstance(child.parent, BinaryNode):
while child.parent:
child.parent.children = [child, None]
child = child.parent
return tree_copy
while child.parent:
child.parent.children = [child]
child = child.parent
return tree_copy
def get_tree_diff(
tree: Node, other_tree: Node, only_diff: bool = True
) -> Optional[Node]:
"""Get difference of `tree` to `other_tree`, changes are relative to `tree`.
(+) and (-) will be added relative to `tree`.
- For example: (+) refers to nodes that are in `other_tree` but not `tree`.
- For example: (-) refers to nodes that are in `tree` but not `other_tree`.
Note that only leaf nodes are compared and have (+) or (-) indicator. Intermediate parent nodes are not compared.
Function can return all original tree nodes and differences, or only the differences.
>>> from bigtree import Node, get_tree_diff, print_tree
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=b)
>>> e = Node("e", parent=root)
>>> print_tree(root)
a
├── b
│ └── d
├── c
└── e
>>> root_other = Node("a")
>>> b_other = Node("b", parent=root_other)
>>> c_other = Node("c", parent=b_other)
>>> d_other = Node("d", parent=root_other)
>>> e_other = Node("e", parent=root_other)
>>> print_tree(root_other)
a
├── b
│ └── c
├── d
└── e
>>> tree_diff = get_tree_diff(root, root_other)
>>> print_tree(tree_diff)
a
├── b
│ ├── c (+)
│ └── d (-)
├── c (-)
└── d (+)
>>> tree_diff = get_tree_diff(root, root_other, only_diff=False)
>>> print_tree(tree_diff)
a
├── b
│ ├── c (+)
│ └── d (-)
├── c (-)
├── d (+)
└── e
Args:
tree (Node): tree to be compared against
other_tree (Node): tree to be compared with
only_diff (bool): indicator to show all nodes or only nodes that are different (+/-), defaults to True
Returns:
(Node)
"""
tree = tree.copy()
other_tree = other_tree.copy()
name_col = "name"
path_col = "PATH"
indicator_col = "Exists"
data = tree_to_dataframe(tree, name_col=name_col, path_col=path_col, leaf_only=True)
data_other = tree_to_dataframe(
other_tree, name_col=name_col, path_col=path_col, leaf_only=True
)
data_both = data[[path_col, name_col]].merge(
data_other[[path_col, name_col]], how="outer", indicator=indicator_col
)
data_both[name_col] = np.where(
data_both[indicator_col] == "left_only",
data_both[name_col] + " (-)",
np.where(
data_both[indicator_col] == "right_only",
data_both[name_col] + " (+)",
data_both[name_col],
),
)
if only_diff:
data_both = data_both.query(f"{indicator_col} != 'both'")
data_both = data_both.drop(columns=indicator_col).sort_values(path_col)
if len(data_both):
return dataframe_to_tree(
data_both,
node_type=tree.__class__,
)

View File

@@ -0,0 +1,856 @@
import logging
from typing import List, Optional
from bigtree.node.node import Node
from bigtree.tree.search import find_path
from bigtree.utils.exceptions import NotFoundError, TreeError
logging.getLogger(__name__).addHandler(logging.NullHandler())
__all__ = [
"shift_nodes",
"copy_nodes",
"copy_nodes_from_tree_to_tree",
"copy_or_shift_logic",
]
def shift_nodes(
tree: Node,
from_paths: List[str],
to_paths: List[str],
sep: str = "/",
skippable: bool = False,
overriding: bool = False,
merge_children: bool = False,
merge_leaves: bool = False,
delete_children: bool = False,
):
"""Shift nodes from `from_paths` to `to_paths` *in-place*.
- Creates intermediate nodes if to path is not present
- Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable).
- Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden).
- Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged).
- Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged)
- Able to shift node only and delete children, defaults to False (nodes are shifted together with children).
For paths in `from_paths` and `to_paths`,
- Path name can be with or without leading tree path separator symbol.
- Path name can be partial path (trailing part of path) or node name.
- Path name must be unique to one node.
For paths in `to_paths`,
- Can set to empty string or None to delete the path in `from_paths`, note that ``copy`` must be set to False.
If ``merge_children=True``,
- If `to_path` is not present, it shifts children of `from_path`.
- If `to_path` is present, and ``overriding=False``, original and new children are merged.
- If `to_path` is present and ``overriding=True``, it behaves like overriding and only new children are retained.
If ``merge_leaves=True``,
- If `to_path` is not present, it shifts leaves of `from_path`.
- If `to_path` is present, and ``overriding=False``, original children and leaves are merged.
- If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained,
original node in `from_path` is retained.
>>> from bigtree import Node, shift_nodes, print_tree
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=root)
>>> print_tree(root)
a
├── b
├── c
└── d
>>> shift_nodes(root, ["a/c", "a/d"], ["a/b/c", "a/dummy/d"])
>>> print_tree(root)
a
├── b
│ └── c
└── dummy
└── d
To delete node,
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> print_tree(root)
a
├── b
└── c
>>> shift_nodes(root, ["a/b"], [None])
>>> print_tree(root)
a
└── c
In overriding case,
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=c)
>>> c2 = Node("c", parent=b)
>>> e = Node("e", parent=c2)
>>> print_tree(root)
a
├── b
│ └── c
│ └── e
└── c
└── d
>>> shift_nodes(root, ["a/b/c"], ["a/c"], overriding=True)
>>> print_tree(root)
a
├── b
└── c
└── e
In ``merge_children`` case, child nodes are shifted instead of the parent node.
- If the path already exists, child nodes are merged with existing children.
- If same node is shifted, the child nodes of the node are merged with the node's parent.
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=c)
>>> c2 = Node("c", parent=b)
>>> e = Node("e", parent=c2)
>>> z = Node("z", parent=b)
>>> y = Node("y", parent=z)
>>> f = Node("f", parent=root)
>>> g = Node("g", parent=f)
>>> h = Node("h", parent=g)
>>> print_tree(root)
a
├── b
│ ├── c
│ │ └── e
│ └── z
│ └── y
├── c
│ └── d
└── f
└── g
└── h
>>> shift_nodes(root, ["a/b/c", "z", "a/f"], ["a/c", "a/z", "a/f"], merge_children=True)
>>> print_tree(root)
a
├── b
├── c
│ ├── d
│ └── e
├── y
└── g
└── h
In ``merge_leaves`` case, leaf nodes are copied instead of the parent node.
- If the path already exists, leaf nodes are merged with existing children.
- If same node is copied, the leaf nodes of the node are merged with the node's parent.
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=c)
>>> c2 = Node("c", parent=b)
>>> e = Node("e", parent=c2)
>>> z = Node("z", parent=b)
>>> y = Node("y", parent=z)
>>> f = Node("f", parent=root)
>>> g = Node("g", parent=f)
>>> h = Node("h", parent=g)
>>> print_tree(root)
a
├── b
│ ├── c
│ │ └── e
│ └── z
│ └── y
├── c
│ └── d
└── f
└── g
└── h
>>> shift_nodes(root, ["a/b/c", "z", "a/f"], ["a/c", "a/z", "a/f"], merge_leaves=True)
>>> print_tree(root)
a
├── b
│ ├── c
│ └── z
├── c
│ ├── d
│ └── e
├── f
│ └── g
├── y
└── h
In ``delete_children`` case, only the node is shifted without its accompanying children/descendants.
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=c)
>>> c2 = Node("c", parent=b)
>>> e = Node("e", parent=c2)
>>> z = Node("z", parent=b)
>>> y = Node("y", parent=z)
>>> print_tree(root)
a
├── b
│ ├── c
│ │ └── e
│ └── z
│ └── y
└── c
└── d
>>> shift_nodes(root, ["a/b/z"], ["a/z"], delete_children=True)
>>> print_tree(root)
a
├── b
│ └── c
│ └── e
├── c
│ └── d
└── z
Args:
tree (Node): tree to modify
from_paths (list): original paths to shift nodes from
to_paths (list): new paths to shift nodes to
sep (str): path separator for input paths, applies to `from_path` and `to_path`
skippable (bool): indicator to skip if from path is not found, defaults to False
overriding (bool): indicator to override existing to path if there is clashes, defaults to False
merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False
merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False
delete_children (bool): indicator to shift node only without children, defaults to False
"""
return copy_or_shift_logic(
tree=tree,
from_paths=from_paths,
to_paths=to_paths,
sep=sep,
copy=False,
skippable=skippable,
overriding=overriding,
merge_children=merge_children,
merge_leaves=merge_leaves,
delete_children=delete_children,
to_tree=None,
) # pragma: no cover
def copy_nodes(
tree: Node,
from_paths: List[str],
to_paths: List[str],
sep: str = "/",
skippable: bool = False,
overriding: bool = False,
merge_children: bool = False,
merge_leaves: bool = False,
delete_children: bool = False,
):
"""Copy nodes from `from_paths` to `to_paths` *in-place*.
- Creates intermediate nodes if to path is not present
- Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable).
- Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden).
- Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged).
- Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged)
- Able to copy node only and delete children, defaults to False (nodes are copied together with children).
For paths in `from_paths` and `to_paths`,
- Path name can be with or without leading tree path separator symbol.
- Path name can be partial path (trailing part of path) or node name.
- Path name must be unique to one node.
If ``merge_children=True``,
- If `to_path` is not present, it copies children of `from_path`.
- If `to_path` is present, and ``overriding=False``, original and new children are merged.
- If `to_path` is present and ``overriding=True``, it behaves like overriding and only new children are retained.
If ``merge_leaves=True``,
- If `to_path` is not present, it copies leaves of `from_path`.
- If `to_path` is present, and ``overriding=False``, original children and leaves are merged.
- If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained.
>>> from bigtree import Node, copy_nodes, print_tree
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=root)
>>> print_tree(root)
a
├── b
├── c
└── d
>>> copy_nodes(root, ["a/c", "a/d"], ["a/b/c", "a/dummy/d"])
>>> print_tree(root)
a
├── b
│ └── c
├── c
├── d
└── dummy
└── d
In overriding case,
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=c)
>>> c2 = Node("c", parent=b)
>>> e = Node("e", parent=c2)
>>> print_tree(root)
a
├── b
│ └── c
│ └── e
└── c
└── d
>>> copy_nodes(root, ["a/b/c"], ["a/c"], overriding=True)
>>> print_tree(root)
a
├── b
│ └── c
│ └── e
└── c
└── e
In ``merge_children`` case, child nodes are copied instead of the parent node.
- If the path already exists, child nodes are merged with existing children.
- If same node is copied, the child nodes of the node are merged with the node's parent.
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=c)
>>> c2 = Node("c", parent=b)
>>> e = Node("e", parent=c2)
>>> z = Node("z", parent=b)
>>> y = Node("y", parent=z)
>>> f = Node("f", parent=root)
>>> g = Node("g", parent=f)
>>> h = Node("h", parent=g)
>>> print_tree(root)
a
├── b
│ ├── c
│ │ └── e
│ └── z
│ └── y
├── c
│ └── d
└── f
└── g
└── h
>>> copy_nodes(root, ["a/b/c", "z", "a/f"], ["a/c", "a/z", "a/f"], merge_children=True)
>>> print_tree(root)
a
├── b
│ ├── c
│ │ └── e
│ └── z
│ └── y
├── c
│ ├── d
│ └── e
├── y
└── g
└── h
In ``merge_leaves`` case, leaf nodes are copied instead of the parent node.
- If the path already exists, leaf nodes are merged with existing children.
- If same node is copied, the leaf nodes of the node are merged with the node's parent.
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=c)
>>> c2 = Node("c", parent=b)
>>> e = Node("e", parent=c2)
>>> z = Node("z", parent=b)
>>> y = Node("y", parent=z)
>>> f = Node("f", parent=root)
>>> g = Node("g", parent=f)
>>> h = Node("h", parent=g)
>>> print_tree(root)
a
├── b
│ ├── c
│ │ └── e
│ └── z
│ └── y
├── c
│ └── d
└── f
└── g
└── h
>>> copy_nodes(root, ["a/b/c", "z", "a/f"], ["a/c", "a/z", "a/f"], merge_leaves=True)
>>> print_tree(root)
a
├── b
│ ├── c
│ │ └── e
│ └── z
│ └── y
├── c
│ ├── d
│ └── e
├── f
│ └── g
│ └── h
├── y
└── h
In ``delete_children`` case, only the node is copied without its accompanying children/descendants.
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=c)
>>> c2 = Node("c", parent=b)
>>> e = Node("e", parent=c2)
>>> z = Node("z", parent=b)
>>> y = Node("y", parent=z)
>>> print_tree(root)
a
├── b
│ ├── c
│ │ └── e
│ └── z
│ └── y
└── c
└── d
>>> copy_nodes(root, ["a/b/z"], ["a/z"], delete_children=True)
>>> print_tree(root)
a
├── b
│ ├── c
│ │ └── e
│ └── z
│ └── y
├── c
│ └── d
└── z
Args:
tree (Node): tree to modify
from_paths (list): original paths to shift nodes from
to_paths (list): new paths to shift nodes to
sep (str): path separator for input paths, applies to `from_path` and `to_path`
skippable (bool): indicator to skip if from path is not found, defaults to False
overriding (bool): indicator to override existing to path if there is clashes, defaults to False
merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False
merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False
delete_children (bool): indicator to copy node only without children, defaults to False
"""
return copy_or_shift_logic(
tree=tree,
from_paths=from_paths,
to_paths=to_paths,
sep=sep,
copy=True,
skippable=skippable,
overriding=overriding,
merge_children=merge_children,
merge_leaves=merge_leaves,
delete_children=delete_children,
to_tree=None,
) # pragma: no cover
def copy_nodes_from_tree_to_tree(
from_tree: Node,
to_tree: Node,
from_paths: List[str],
to_paths: List[str],
sep: str = "/",
skippable: bool = False,
overriding: bool = False,
merge_children: bool = False,
merge_leaves: bool = False,
delete_children: bool = False,
):
"""Copy nodes from `from_paths` to `to_paths` *in-place*.
- Creates intermediate nodes if to path is not present
- Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable).
- Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden).
- Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged).
- Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged)
- Able to copy node only and delete children, defaults to False (nodes are copied together with children).
For paths in `from_paths` and `to_paths`,
- Path name can be with or without leading tree path separator symbol.
- Path name can be partial path (trailing part of path) or node name.
- Path name must be unique to one node.
If ``merge_children=True``,
- If `to_path` is not present, it copies children of `from_path`
- If `to_path` is present, and ``overriding=False``, original and new children are merged
- If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained.
If ``merge_leaves=True``,
- If `to_path` is not present, it copies leaves of `from_path`.
- If `to_path` is present, and ``overriding=False``, original children and leaves are merged.
- If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained.
>>> from bigtree import Node, copy_nodes_from_tree_to_tree, print_tree
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=root)
>>> d = Node("d", parent=c)
>>> e = Node("e", parent=root)
>>> f = Node("f", parent=e)
>>> g = Node("g", parent=f)
>>> print_tree(root)
a
├── b
├── c
│ └── d
└── e
└── f
└── g
>>> root_other = Node("aa")
>>> copy_nodes_from_tree_to_tree(root, root_other, ["a/b", "a/c", "a/e"], ["aa/b", "aa/b/c", "aa/dummy/e"])
>>> print_tree(root_other)
aa
├── b
│ └── c
│ └── d
└── dummy
└── e
└── f
└── g
In overriding case,
>>> root_other = Node("aa")
>>> c = Node("c", parent=root_other)
>>> e = Node("e", parent=c)
>>> print_tree(root_other)
aa
└── c
└── e
>>> copy_nodes_from_tree_to_tree(root, root_other, ["a/b", "a/c"], ["aa/b", "aa/c"], overriding=True)
>>> print_tree(root_other)
aa
├── b
└── c
└── d
In ``merge_children`` case, child nodes are copied instead of the parent node.
- If the path already exists, child nodes are merged with existing children.
>>> root_other = Node("aa")
>>> c = Node("c", parent=root_other)
>>> e = Node("e", parent=c)
>>> print_tree(root_other)
aa
└── c
└── e
>>> copy_nodes_from_tree_to_tree(root, root_other, ["a/c", "e"], ["a/c", "a/e"], merge_children=True)
>>> print_tree(root_other)
aa
├── c
│ ├── e
│ └── d
└── f
└── g
In ``merge_leaves`` case, leaf nodes are copied instead of the parent node.
- If the path already exists, leaf nodes are merged with existing children.
>>> root_other = Node("aa")
>>> c = Node("c", parent=root_other)
>>> e = Node("e", parent=c)
>>> print_tree(root_other)
aa
└── c
└── e
>>> copy_nodes_from_tree_to_tree(root, root_other, ["a/c", "e"], ["a/c", "a/e"], merge_leaves=True)
>>> print_tree(root_other)
aa
├── c
│ ├── e
│ └── d
└── g
In ``delete_children`` case, only the node is copied without its accompanying children/descendants.
>>> root_other = Node("aa")
>>> print_tree(root_other)
aa
>>> copy_nodes_from_tree_to_tree(root, root_other, ["a/c", "e"], ["a/c", "a/e"], delete_children=True)
>>> print_tree(root_other)
aa
├── c
└── e
Args:
from_tree (Node): tree to copy nodes from
to_tree (Node): tree to copy nodes to
from_paths (list): original paths to shift nodes from
to_paths (list): new paths to shift nodes to
sep (str): path separator for input paths, applies to `from_path` and `to_path`
skippable (bool): indicator to skip if from path is not found, defaults to False
overriding (bool): indicator to override existing to path if there is clashes, defaults to False
merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False
merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False
delete_children (bool): indicator to copy node only without children, defaults to False
"""
return copy_or_shift_logic(
tree=from_tree,
from_paths=from_paths,
to_paths=to_paths,
sep=sep,
copy=True,
skippable=skippable,
overriding=overriding,
merge_children=merge_children,
merge_leaves=merge_leaves,
delete_children=delete_children,
to_tree=to_tree,
) # pragma: no cover
def copy_or_shift_logic(
tree: Node,
from_paths: List[str],
to_paths: List[str],
sep: str = "/",
copy: bool = False,
skippable: bool = False,
overriding: bool = False,
merge_children: bool = False,
merge_leaves: bool = False,
delete_children: bool = False,
to_tree: Optional[Node] = None,
):
"""Shift or copy nodes from `from_paths` to `to_paths` *in-place*.
- Creates intermediate nodes if to path is not present
- Able to copy node, defaults to False (nodes are shifted; not copied).
- Able to skip nodes if from path is not found, defaults to False (from-nodes must be found; not skippable)
- Able to override existing node if it exists, defaults to False (to-nodes must not exist; not overridden)
- Able to merge children and remove intermediate parent node, defaults to False (nodes are shifted; not merged)
- Able to merge only leaf nodes and remove all intermediate nodes, defaults to False (nodes are shifted; not merged)
- Able to shift/copy node only and delete children, defaults to False (nodes are shifted/copied together with children).
- Able to shift/copy nodes from one tree to another tree, defaults to None (shifting/copying happens within same tree)
For paths in `from_paths` and `to_paths`,
- Path name can be with or without leading tree path separator symbol.
- Path name can be partial path (trailing part of path) or node name.
- Path name must be unique to one node.
For paths in `to_paths`,
- Can set to empty string or None to delete the path in `from_paths`, note that ``copy`` must be set to False.
If ``merge_children=True``,
- If `to_path` is not present, it shifts/copies children of `from_path`.
- If `to_path` is present, and ``overriding=False``, original and new children are merged.
- If `to_path` is present and ``overriding=True``, it behaves like overriding and only new children are retained.
If ``merge_leaves=True``,
- If `to_path` is not present, it shifts/copies leaves of `from_path`.
- If `to_path` is present, and ``overriding=False``, original children and leaves are merged.
- If `to_path` is present and ``overriding=True``, it behaves like overriding and only new leaves are retained,
original non-leaf nodes in `from_path` are retained.
Args:
tree (Node): tree to modify
from_paths (list): original paths to shift nodes from
to_paths (list): new paths to shift nodes to
sep (str): path separator for input paths, applies to `from_path` and `to_path`
copy (bool): indicator to copy node, defaults to False
skippable (bool): indicator to skip if from path is not found, defaults to False
overriding (bool): indicator to override existing to path if there is clashes, defaults to False
merge_children (bool): indicator to merge children and remove intermediate parent node, defaults to False
merge_leaves (bool): indicator to merge leaf nodes and remove intermediate parent node(s), defaults to False
delete_children (bool): indicator to shift/copy node only without children, defaults to False
to_tree (Node): tree to copy to, defaults to None
"""
if merge_children and merge_leaves:
raise ValueError(
"Invalid shifting, can only specify one type of merging, check `merge_children` and `merge_leaves`"
)
if not (isinstance(from_paths, list) and isinstance(to_paths, list)):
raise ValueError(
"Invalid type, `from_paths` and `to_paths` should be list type"
)
if len(from_paths) != len(to_paths):
raise ValueError(
f"Paths are different length, input `from_paths` have {len(from_paths)} entries, "
f"while output `to_paths` have {len(to_paths)} entries"
)
for from_path, to_path in zip(from_paths, to_paths):
if to_path:
if from_path.split(sep)[-1] != to_path.split(sep)[-1]:
raise ValueError(
f"Unable to assign from_path {from_path} to to_path {to_path}\n"
f"Verify that `sep` is defined correctly for path\n"
f"Alternatively, check that `from_path` and `to_path` is reassigning the same node"
)
transfer_indicator = False
node_type = tree.__class__
tree_sep = tree.sep
if to_tree:
transfer_indicator = True
node_type = to_tree.__class__
tree_sep = to_tree.sep
for from_path, to_path in zip(from_paths, to_paths):
from_path = from_path.replace(sep, tree.sep)
from_node = find_path(tree, from_path)
# From node not found
if not from_node:
if not skippable:
raise NotFoundError(
f"Unable to find from_path {from_path}\n"
f"Set `skippable` to True to skip shifting for nodes not found"
)
else:
logging.info(f"Unable to find from_path {from_path}")
# From node found
else:
# Node to be deleted
if not to_path:
to_node = None
# Node to be copied/shifted
else:
to_path = to_path.replace(sep, tree_sep)
if transfer_indicator:
to_node = find_path(to_tree, to_path)
else:
to_node = find_path(tree, to_path)
# To node found
if to_node:
if from_node == to_node:
if merge_children:
parent = to_node.parent
to_node.parent = None
to_node = parent
elif merge_leaves:
to_node = to_node.parent
else:
raise TreeError(
f"Attempting to shift the same node {from_node.node_name} back to the same position\n"
f"Check from path {from_path} and to path {to_path}\n"
f"Alternatively, set `merge_children` or `merge_leaves` to True if intermediate node is to be removed"
)
elif merge_children:
# Specify override to remove existing node, else children are merged
if not overriding:
logging.info(
f"Path {to_path} already exists and children are merged"
)
else:
logging.info(
f"Path {to_path} already exists and its children be overridden by the merge"
)
parent = to_node.parent
to_node.parent = None
to_node = parent
merge_children = False
elif merge_leaves:
# Specify override to remove existing node, else leaves are merged
if not overriding:
logging.info(
f"Path {to_path} already exists and leaves are merged"
)
else:
logging.info(
f"Path {to_path} already exists and its leaves be overridden by the merge"
)
del to_node.children
else:
if not overriding:
raise TreeError(
f"Path {to_path} already exists and unable to override\n"
f"Set `overriding` to True to perform overrides\n"
f"Alternatively, set `merge_children` to True if nodes are to be merged"
)
logging.info(
f"Path {to_path} already exists and will be overridden"
)
parent = to_node.parent
to_node.parent = None
to_node = parent
# To node not found
else:
# Find parent node
to_path_list = to_path.split(tree_sep)
idx = 1
to_path_parent = tree_sep.join(to_path_list[:-idx])
if transfer_indicator:
to_node = find_path(to_tree, to_path_parent)
else:
to_node = find_path(tree, to_path_parent)
# Create intermediate parent node, if applicable
while (not to_node) & (idx + 1 < len(to_path_list)):
idx += 1
to_path_parent = sep.join(to_path_list[:-idx])
if transfer_indicator:
to_node = find_path(to_tree, to_path_parent)
else:
to_node = find_path(tree, to_path_parent)
if not to_node:
raise NotFoundError(
f"Unable to find to_path {to_path}\n"
f"Please specify valid path to shift node to"
)
for depth in range(len(to_path_list) - idx, len(to_path_list) - 1):
intermediate_child_node = node_type(to_path_list[depth])
intermediate_child_node.parent = to_node
to_node = intermediate_child_node
# Reassign from_node to new parent
if copy:
logging.debug(f"Copying {from_node.node_name}")
from_node = from_node.copy()
if merge_children:
logging.debug(
f"Reassigning children from {from_node.node_name} to {to_node.node_name}"
)
for children in from_node.children:
if delete_children:
del children.children
children.parent = to_node
from_node.parent = None
elif merge_leaves:
logging.debug(
f"Reassigning leaf nodes from {from_node.node_name} to {to_node.node_name}"
)
for children in from_node.leaves:
children.parent = to_node
else:
if delete_children:
del from_node.children
from_node.parent = to_node

View File

@@ -0,0 +1,316 @@
from typing import Any, Callable, Iterable
from bigtree.node.basenode import BaseNode
from bigtree.node.node import Node
from bigtree.utils.exceptions import CorruptedTreeError, SearchError
from bigtree.utils.iterators import preorder_iter
__all__ = [
"findall",
"find",
"find_name",
"find_names",
"find_full_path",
"find_path",
"find_paths",
"find_attr",
"find_attrs",
"find_children",
]
def findall(
tree: BaseNode,
condition: Callable,
max_depth: int = None,
min_count: int = None,
max_count: int = None,
) -> tuple:
"""
Search tree for nodes matching condition (callable function).
>>> from bigtree import Node, findall
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=c)
>>> findall(root, lambda node: node.age > 62)
(Node(/a, age=90), Node(/a/b, age=65))
Args:
tree (BaseNode): tree to search
condition (Callable): function that takes in node as argument, returns node if condition evaluates to `True`
max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None
min_count (int): checks for minimum number of occurrence,
raise SearchError if number of results do not meet min_count, defaults to None
max_count (int): checks for maximum number of occurrence,
raise SearchError if number of results do not meet min_count, defaults to None
Returns:
(tuple)
"""
result = tuple(preorder_iter(tree, filter_condition=condition, max_depth=max_depth))
if min_count and len(result) < min_count:
raise SearchError(
f"Expected more than {min_count} element(s), found {len(result)} elements\n{result}"
)
if max_count and len(result) > max_count:
raise SearchError(
f"Expected less than {max_count} element(s), found {len(result)} elements\n{result}"
)
return result
def find(tree: BaseNode, condition: Callable, max_depth: int = None) -> BaseNode:
"""
Search tree for *single node* matching condition (callable function).
>>> from bigtree import Node, find
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=c)
>>> find(root, lambda node: node.age == 65)
Node(/a/b, age=65)
>>> find(root, lambda node: node.age > 5)
Traceback (most recent call last):
...
bigtree.utils.exceptions.SearchError: Expected less than 1 element(s), found 4 elements
(Node(/a, age=90), Node(/a/b, age=65), Node(/a/c, age=60), Node(/a/c/d, age=40))
Args:
tree (BaseNode): tree to search
condition (Callable): function that takes in node as argument, returns node if condition evaluates to `True`
max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None
Returns:
(BaseNode)
"""
result = findall(tree, condition, max_depth, max_count=1)
if result:
return result[0]
def find_name(tree: Node, name: str, max_depth: int = None) -> Node:
"""
Search tree for single node matching name attribute.
>>> from bigtree import Node, find_name
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=c)
>>> find_name(root, "c")
Node(/a/c, age=60)
Args:
tree (Node): tree to search
name (str): value to match for name attribute
max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None
Returns:
(Node)
"""
return find(tree, lambda node: node.node_name == name, max_depth)
def find_names(tree: Node, name: str, max_depth: int = None) -> Iterable[Node]:
"""
Search tree for multiple node(s) matching name attribute.
>>> from bigtree import Node, find_names
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("b", age=40, parent=c)
>>> find_names(root, "c")
(Node(/a/c, age=60),)
>>> find_names(root, "b")
(Node(/a/b, age=65), Node(/a/c/b, age=40))
Args:
tree (Node): tree to search
name (str): value to match for name attribute
max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None
Returns:
(Iterable[Node])
"""
return findall(tree, lambda node: node.node_name == name, max_depth)
def find_full_path(tree: Node, path_name: str) -> Node:
"""
Search tree for single node matching path attribute.
- Path name can be with or without leading tree path separator symbol.
- Path name must be full path, works similar to `find_path` but faster.
>>> from bigtree import Node, find_full_path
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=c)
>>> find_full_path(root, "/a/c/d")
Node(/a/c/d, age=40)
Args:
tree (Node): tree to search
path_name (str): value to match (full path) of path_name attribute
Returns:
(Node)
"""
path_name = path_name.rstrip(tree.sep).lstrip(tree.sep)
path_list = path_name.split(tree.sep)
if path_list[0] != tree.root.node_name:
raise ValueError(
f"Path {path_name} does not match the root node name {tree.root.node_name}"
)
parent_node = tree.root
child_node = parent_node
for child_name in path_list[1:]:
child_node = find_children(parent_node, child_name)
if not child_node:
break
parent_node = child_node
return child_node
def find_path(tree: Node, path_name: str) -> Node:
"""
Search tree for single node matching path attribute.
- Path name can be with or without leading tree path separator symbol.
- Path name can be full path or partial path (trailing part of path) or node name.
>>> from bigtree import Node, find_path
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=c)
>>> find_path(root, "c")
Node(/a/c, age=60)
>>> find_path(root, "/c")
Node(/a/c, age=60)
Args:
tree (Node): tree to search
path_name (str): value to match (full path) or trailing part (partial path) of path_name attribute
Returns:
(Node)
"""
path_name = path_name.rstrip(tree.sep)
return find(tree, lambda node: node.path_name.endswith(path_name))
def find_paths(tree: Node, path_name: str) -> tuple:
"""
Search tree for multiple nodes matching path attribute.
- Path name can be with or without leading tree path separator symbol.
- Path name can be partial path (trailing part of path) or node name.
>>> from bigtree import Node, find_paths
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("c", age=40, parent=c)
>>> find_paths(root, "/a/c")
(Node(/a/c, age=60),)
>>> find_paths(root, "/c")
(Node(/a/c, age=60), Node(/a/c/c, age=40))
Args:
tree (Node): tree to search
path_name (str): value to match (full path) or trailing part (partial path) of path_name attribute
Returns:
(tuple)
"""
path_name = path_name.rstrip(tree.sep)
return findall(tree, lambda node: node.path_name.endswith(path_name))
def find_attr(
tree: BaseNode, attr_name: str, attr_value: Any, max_depth: int = None
) -> BaseNode:
"""
Search tree for single node matching custom attribute.
>>> from bigtree import Node, find_attr
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=c)
>>> find_attr(root, "age", 65)
Node(/a/b, age=65)
Args:
tree (BaseNode): tree to search
attr_name (str): attribute name to perform matching
attr_value (Any): value to match for attr_name attribute
max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None
Returns:
(BaseNode)
"""
return find(
tree, lambda node: node.__getattribute__(attr_name) == attr_value, max_depth
)
def find_attrs(
tree: BaseNode, attr_name: str, attr_value: Any, max_depth: int = None
) -> tuple:
"""
Search tree for node(s) matching custom attribute.
>>> from bigtree import Node, find_attrs
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=65, parent=root)
>>> d = Node("d", age=40, parent=c)
>>> find_attrs(root, "age", 65)
(Node(/a/b, age=65), Node(/a/c, age=65))
Args:
tree (BaseNode): tree to search
attr_name (str): attribute name to perform matching
attr_value (Any): value to match for attr_name attribute
max_depth (int): maximum depth to search for, based on `depth` attribute, defaults to None
Returns:
(tuple)
"""
return findall(
tree, lambda node: node.__getattribute__(attr_name) == attr_value, max_depth
)
def find_children(tree: Node, name: str) -> Node:
"""
Search tree for single node matching name attribute.
>>> from bigtree import Node, find_children
>>> root = Node("a", age=90)
>>> b = Node("b", age=65, parent=root)
>>> c = Node("c", age=60, parent=root)
>>> d = Node("d", age=40, parent=c)
>>> find_children(root, "c")
Node(/a/c, age=60)
>>> find_children(c, "d")
Node(/a/c/d, age=40)
Args:
tree (Node): tree to search, parent node
name (str): value to match for name attribute, child node
Returns:
(Node)
"""
child = [node for node in tree.children if node and node.node_name == name]
if len(child) > 1: # pragma: no cover
raise CorruptedTreeError(
f"There are more than one path for {child[0].path_name}, check {child}"
)
elif len(child):
return child[0]