202 lines
6.1 KiB
Python
202 lines
6.1 KiB
Python
from typing import Optional, Type
|
|
|
|
import numpy as np
|
|
|
|
from bigtree.node.basenode import BaseNode
|
|
from bigtree.node.binarynode import BinaryNode
|
|
from bigtree.node.node import Node
|
|
from bigtree.tree.construct import dataframe_to_tree
|
|
from bigtree.tree.export import tree_to_dataframe
|
|
from bigtree.tree.search import find_path
|
|
from bigtree.utils.exceptions import NotFoundError
|
|
|
|
__all__ = ["clone_tree", "prune_tree", "get_tree_diff"]
|
|
|
|
|
|
def clone_tree(tree: BaseNode, node_type: Type[BaseNode]) -> BaseNode:
|
|
"""Clone tree to another `Node` type.
|
|
If the same type is needed, simply do a tree.copy().
|
|
|
|
>>> from bigtree import BaseNode, Node, clone_tree
|
|
>>> root = BaseNode(name="a")
|
|
>>> b = BaseNode(name="b", parent=root)
|
|
>>> clone_tree(root, Node)
|
|
Node(/a, )
|
|
|
|
Args:
|
|
tree (BaseNode): tree to be cloned, must inherit from BaseNode
|
|
node_type (Type[BaseNode]): type of cloned tree
|
|
|
|
Returns:
|
|
(BaseNode)
|
|
"""
|
|
if not isinstance(tree, BaseNode):
|
|
raise ValueError(
|
|
"Tree should be of type `BaseNode`, or inherit from `BaseNode`"
|
|
)
|
|
|
|
# Start from root
|
|
root_info = dict(tree.root.describe(exclude_prefix="_"))
|
|
root_node = node_type(**root_info)
|
|
|
|
def recursive_add_child(_new_parent_node, _parent_node):
|
|
for _child in _parent_node.children:
|
|
if _child:
|
|
child_info = dict(_child.describe(exclude_prefix="_"))
|
|
child_node = node_type(**child_info)
|
|
child_node.parent = _new_parent_node
|
|
recursive_add_child(child_node, _child)
|
|
|
|
recursive_add_child(root_node, tree.root)
|
|
return root_node
|
|
|
|
|
|
def prune_tree(tree: Node, prune_path: str, sep: str = "/") -> Node:
|
|
"""Prune tree to leave only the prune path, returns the root of a *copy* of the original tree.
|
|
|
|
All siblings along the prune path will be removed.
|
|
Prune path name should be unique, can be full path or partial path (trailing part of path) or node name.
|
|
|
|
Path should contain `Node` name, separated by `sep`.
|
|
- For example: Path string "a/b" refers to Node("b") with parent Node("a").
|
|
|
|
>>> from bigtree import Node, prune_tree, print_tree
|
|
>>> root = Node("a")
|
|
>>> b = Node("b", parent=root)
|
|
>>> c = Node("c", parent=root)
|
|
>>> print_tree(root)
|
|
a
|
|
├── b
|
|
└── c
|
|
|
|
>>> root_pruned = prune_tree(root, "a/b")
|
|
>>> print_tree(root_pruned)
|
|
a
|
|
└── b
|
|
|
|
Args:
|
|
tree (Node): existing tree
|
|
prune_path (str): prune path, all siblings along the prune path will be removed
|
|
sep (str): path separator
|
|
|
|
Returns:
|
|
(Node)
|
|
"""
|
|
prune_path = prune_path.replace(sep, tree.sep)
|
|
tree_copy = tree.copy()
|
|
child = find_path(tree_copy, prune_path)
|
|
if not child:
|
|
raise NotFoundError(
|
|
f"Cannot find any node matching path_name ending with {prune_path}"
|
|
)
|
|
|
|
if isinstance(child.parent, BinaryNode):
|
|
while child.parent:
|
|
child.parent.children = [child, None]
|
|
child = child.parent
|
|
return tree_copy
|
|
|
|
while child.parent:
|
|
child.parent.children = [child]
|
|
child = child.parent
|
|
return tree_copy
|
|
|
|
|
|
def get_tree_diff(
|
|
tree: Node, other_tree: Node, only_diff: bool = True
|
|
) -> Optional[Node]:
|
|
"""Get difference of `tree` to `other_tree`, changes are relative to `tree`.
|
|
|
|
(+) and (-) will be added relative to `tree`.
|
|
- For example: (+) refers to nodes that are in `other_tree` but not `tree`.
|
|
- For example: (-) refers to nodes that are in `tree` but not `other_tree`.
|
|
|
|
Note that only leaf nodes are compared and have (+) or (-) indicator. Intermediate parent nodes are not compared.
|
|
|
|
Function can return all original tree nodes and differences, or only the differences.
|
|
|
|
>>> from bigtree import Node, get_tree_diff, print_tree
|
|
>>> root = Node("a")
|
|
>>> b = Node("b", parent=root)
|
|
>>> c = Node("c", parent=root)
|
|
>>> d = Node("d", parent=b)
|
|
>>> e = Node("e", parent=root)
|
|
>>> print_tree(root)
|
|
a
|
|
├── b
|
|
│ └── d
|
|
├── c
|
|
└── e
|
|
|
|
>>> root_other = Node("a")
|
|
>>> b_other = Node("b", parent=root_other)
|
|
>>> c_other = Node("c", parent=b_other)
|
|
>>> d_other = Node("d", parent=root_other)
|
|
>>> e_other = Node("e", parent=root_other)
|
|
>>> print_tree(root_other)
|
|
a
|
|
├── b
|
|
│ └── c
|
|
├── d
|
|
└── e
|
|
|
|
>>> tree_diff = get_tree_diff(root, root_other)
|
|
>>> print_tree(tree_diff)
|
|
a
|
|
├── b
|
|
│ ├── c (+)
|
|
│ └── d (-)
|
|
├── c (-)
|
|
└── d (+)
|
|
|
|
>>> tree_diff = get_tree_diff(root, root_other, only_diff=False)
|
|
>>> print_tree(tree_diff)
|
|
a
|
|
├── b
|
|
│ ├── c (+)
|
|
│ └── d (-)
|
|
├── c (-)
|
|
├── d (+)
|
|
└── e
|
|
|
|
Args:
|
|
tree (Node): tree to be compared against
|
|
other_tree (Node): tree to be compared with
|
|
only_diff (bool): indicator to show all nodes or only nodes that are different (+/-), defaults to True
|
|
|
|
Returns:
|
|
(Node)
|
|
"""
|
|
tree = tree.copy()
|
|
other_tree = other_tree.copy()
|
|
name_col = "name"
|
|
path_col = "PATH"
|
|
indicator_col = "Exists"
|
|
|
|
data = tree_to_dataframe(tree, name_col=name_col, path_col=path_col, leaf_only=True)
|
|
data_other = tree_to_dataframe(
|
|
other_tree, name_col=name_col, path_col=path_col, leaf_only=True
|
|
)
|
|
data_both = data[[path_col, name_col]].merge(
|
|
data_other[[path_col, name_col]], how="outer", indicator=indicator_col
|
|
)
|
|
|
|
data_both[name_col] = np.where(
|
|
data_both[indicator_col] == "left_only",
|
|
data_both[name_col] + " (-)",
|
|
np.where(
|
|
data_both[indicator_col] == "right_only",
|
|
data_both[name_col] + " (+)",
|
|
data_both[name_col],
|
|
),
|
|
)
|
|
|
|
if only_diff:
|
|
data_both = data_both.query(f"{indicator_col} != 'both'")
|
|
data_both = data_both.drop(columns=indicator_col).sort_values(path_col)
|
|
if len(data_both):
|
|
return dataframe_to_tree(
|
|
data_both,
|
|
node_type=tree.__class__,
|
|
)
|