Files
common/python310/packages/bigtree/tree/helper.py

416 lines
14 KiB
Python

from collections import deque
from typing import Any, Deque, Dict, List, Set, Type, TypeVar, Union
from bigtree.node.basenode import BaseNode
from bigtree.node.binarynode import BinaryNode
from bigtree.node.node import Node
from bigtree.tree.construct import add_dict_to_tree_by_path, dataframe_to_tree
from bigtree.tree.export import tree_to_dataframe
from bigtree.tree.search import find_path
from bigtree.utils.exceptions import NotFoundError
from bigtree.utils.iterators import levelordergroup_iter
__all__ = ["clone_tree", "get_subtree", "prune_tree", "get_tree_diff"]
BaseNodeT = TypeVar("BaseNodeT", bound=BaseNode)
BinaryNodeT = TypeVar("BinaryNodeT", bound=BinaryNode)
NodeT = TypeVar("NodeT", bound=Node)
def clone_tree(tree: BaseNode, node_type: Type[BaseNodeT]) -> BaseNodeT:
"""Clone tree to another ``Node`` type.
If the same type is needed, simply do a tree.copy().
Examples:
>>> from bigtree import BaseNode, Node, clone_tree
>>> root = BaseNode(name="a")
>>> b = BaseNode(name="b", parent=root)
>>> clone_tree(root, Node)
Node(/a, )
Args:
tree (BaseNode): tree to be cloned, must inherit from BaseNode
node_type (Type[BaseNode]): type of cloned tree
Returns:
(BaseNode)
"""
if not isinstance(tree, BaseNode):
raise TypeError("Tree should be of type `BaseNode`, or inherit from `BaseNode`")
# Start from root
root_info = dict(tree.root.describe(exclude_prefix="_"))
root_node = node_type(**root_info)
def _recursive_add_child(
_new_parent_node: BaseNodeT, _parent_node: BaseNode
) -> None:
"""Recursively clone current node
Args:
_new_parent_node (BaseNode): cloned parent node
_parent_node (BaseNode): parent node to be cloned
"""
for _child in _parent_node.children:
if _child:
child_info = dict(_child.describe(exclude_prefix="_"))
child_node = node_type(**child_info)
child_node.parent = _new_parent_node
_recursive_add_child(child_node, _child)
_recursive_add_child(root_node, tree.root)
return root_node
def get_subtree(
tree: NodeT,
node_name_or_path: str = "",
max_depth: int = 0,
) -> NodeT:
"""Get subtree based on node name or node path, and/or maximum depth of tree.
Examples:
>>> from bigtree import Node, get_subtree
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=b)
>>> d = Node("d", parent=b)
>>> e = Node("e", parent=root)
>>> root.show()
a
├── b
│ ├── c
│ └── d
└── e
Get subtree
>>> root_subtree = get_subtree(root, "b")
>>> root_subtree.show()
b
├── c
└── d
Args:
tree (Node): existing tree
node_name_or_path (str): node name or path to get subtree, defaults to None
max_depth (int): maximum depth of subtree, based on `depth` attribute, defaults to None
Returns:
(Node)
"""
tree = tree.copy()
if node_name_or_path:
tree = find_path(tree, node_name_or_path)
if not tree:
raise ValueError(f"Node name or path {node_name_or_path} not found")
if not tree.is_root:
tree.parent = None
if max_depth:
tree = prune_tree(tree, max_depth=max_depth)
return tree
def prune_tree(
tree: Union[BinaryNodeT, NodeT],
prune_path: Union[List[str], str] = "",
exact: bool = False,
sep: str = "/",
max_depth: int = 0,
) -> Union[BinaryNodeT, NodeT]:
"""Prune tree by path or depth, returns the root of a *copy* of the original tree.
For pruning by `prune_path`,
- All siblings along the prune path will be removed.
- If ``exact=True``, all descendants of prune path will be removed.
- Prune path can be string (only one path) or a list of strings (multiple paths).
- Prune path name should be unique, can be full path, partial path (trailing part of path), or node name.
For pruning by `max_depth`,
- All nodes that are beyond `max_depth` will be removed.
Path should contain ``Node`` name, separated by `sep`.
- For example: Path string "a/b" refers to Node("b") with parent Node("a").
Examples:
>>> from bigtree import Node, prune_tree
>>> root = Node("a")
>>> b = Node("b", parent=root)
>>> c = Node("c", parent=b)
>>> d = Node("d", parent=b)
>>> e = Node("e", parent=root)
>>> root.show()
a
├── b
│ ├── c
│ └── d
└── e
Prune (default is keep descendants)
>>> root_pruned = prune_tree(root, "a/b")
>>> root_pruned.show()
a
└── b
├── c
└── d
Prune exact path
>>> root_pruned = prune_tree(root, "a/b", exact=True)
>>> root_pruned.show()
a
└── b
Prune multiple paths
>>> root_pruned = prune_tree(root, ["a/b/d", "a/e"])
>>> root_pruned.show()
a
├── b
│ └── d
└── e
Prune by depth
>>> root_pruned = prune_tree(root, max_depth=2)
>>> root_pruned.show()
a
├── b
└── e
Args:
tree (Union[BinaryNode, Node]): existing tree
prune_path (List[str] | str): prune path(s), all siblings along the prune path(s) will be removed
exact (bool): prune path(s) to be exactly the path, defaults to False (descendants of the path are retained)
sep (str): path separator of `prune_path`
max_depth (int): maximum depth of pruned tree, based on `depth` attribute, defaults to None
Returns:
(Union[BinaryNode, Node])
"""
if isinstance(prune_path, str):
prune_path = [prune_path] if prune_path else []
if not len(prune_path) and not max_depth:
raise ValueError("Please specify either `prune_path` or `max_depth` or both.")
tree_copy = tree.copy()
# Prune by path (prune bottom-up)
if len(prune_path):
ancestors_to_prune: Set[Union[BinaryNodeT, NodeT]] = set()
nodes_to_prune: Set[Union[BinaryNodeT, NodeT]] = set()
for path in prune_path:
path = path.replace(sep, tree.sep)
child = find_path(tree_copy, path)
if not child:
raise NotFoundError(
f"Cannot find any node matching path_name ending with {path}"
)
nodes_to_prune.add(child)
ancestors_to_prune.update(list(child.ancestors))
if exact:
ancestors_to_prune.update(nodes_to_prune)
for node in ancestors_to_prune:
for child in node.children:
if (
child
and child not in ancestors_to_prune
and child not in nodes_to_prune
):
child.parent = None
# Prune by depth (prune top-down)
if max_depth:
for depth, level_nodes in enumerate(levelordergroup_iter(tree_copy), 1):
if depth == max_depth:
for level_node in level_nodes:
del level_node.children
return tree_copy
def get_tree_diff(
tree: Node, other_tree: Node, only_diff: bool = True, attr_list: List[str] = []
) -> Node:
"""Get difference of `tree` to `other_tree`, changes are relative to `tree`.
Compares the difference in tree structure (default), but can also compare tree attributes using `attr_list`.
Function can return only the differences (default), or all original tree nodes and differences.
Comparing tree structure:
- (+) and (-) will be added to node name relative to `tree`.
- For example: (+) refers to nodes that are in `other_tree` but not `tree`.
- For example: (-) refers to nodes that are in `tree` but not `other_tree`.
Examples:
>>> # Create original tree
>>> from bigtree import Node, get_tree_diff, list_to_tree
>>> root = list_to_tree(["Downloads/Pictures/photo1.jpg", "Downloads/file1.doc", "Downloads/photo2.jpg"])
>>> root.show()
Downloads
├── Pictures
│ └── photo1.jpg
├── file1.doc
└── photo2.jpg
>>> # Create other tree
>>> root_other = list_to_tree(["Downloads/Pictures/photo1.jpg", "Downloads/Pictures/photo2.jpg", "Downloads/file1.doc"])
>>> root_other.show()
Downloads
├── Pictures
│ ├── photo1.jpg
│ └── photo2.jpg
└── file1.doc
>>> # Get tree differences
>>> tree_diff = get_tree_diff(root, root_other)
>>> tree_diff.show()
Downloads
├── photo2.jpg (-)
└── Pictures
└── photo2.jpg (+)
>>> tree_diff = get_tree_diff(root, root_other, only_diff=False)
>>> tree_diff.show()
Downloads
├── Pictures
│ ├── photo1.jpg
│ └── photo2.jpg (+)
├── file1.doc
└── photo2.jpg (-)
Comparing tree attributes
- (~) will be added to node name if there are differences in tree attributes defined in `attr_list`.
- The node's attributes will be a list of [value in `tree`, value in `other_tree`]
>>> # Create original tree
>>> root = Node("Downloads")
>>> picture_folder = Node("Pictures", parent=root)
>>> photo2 = Node("photo1.jpg", tags="photo1", parent=picture_folder)
>>> file1 = Node("file1.doc", tags="file1", parent=root)
>>> root.show(attr_list=["tags"])
Downloads
├── Pictures
│ └── photo1.jpg [tags=photo1]
└── file1.doc [tags=file1]
>>> # Create other tree
>>> root_other = Node("Downloads")
>>> picture_folder = Node("Pictures", parent=root_other)
>>> photo1 = Node("photo1.jpg", tags="photo1-edited", parent=picture_folder)
>>> photo2 = Node("photo2.jpg", tags="photo2-new", parent=picture_folder)
>>> file1 = Node("file1.doc", tags="file1", parent=root_other)
>>> root_other.show(attr_list=["tags"])
Downloads
├── Pictures
│ ├── photo1.jpg [tags=photo1-edited]
│ └── photo2.jpg [tags=photo2-new]
└── file1.doc [tags=file1]
>>> # Get tree differences
>>> tree_diff = get_tree_diff(root, root_other, attr_list=["tags"])
>>> tree_diff.show(attr_list=["tags"])
Downloads
└── Pictures
├── photo1.jpg (~) [tags=('photo1', 'photo1-edited')]
└── photo2.jpg (+)
Args:
tree (Node): tree to be compared against
other_tree (Node): tree to be compared with
only_diff (bool): indicator to show all nodes or only nodes that are different (+/-), defaults to True
attr_list (List[str]): tree attributes to check for difference, defaults to empty list
Returns:
(Node)
"""
other_tree.sep = tree.sep
name_col = "name"
path_col = "PATH"
indicator_col = "Exists"
data, data_other = (
tree_to_dataframe(
_tree,
name_col=name_col,
path_col=path_col,
attr_dict={k: k for k in attr_list},
)
for _tree in (tree, other_tree)
)
# Check tree structure difference
data_both = data[[path_col, name_col] + attr_list].merge(
data_other[[path_col, name_col] + attr_list],
how="outer",
on=[path_col, name_col],
indicator=indicator_col,
)
# Handle tree structure difference
nodes_removed = list(data_both[data_both[indicator_col] == "left_only"][path_col])[
::-1
]
nodes_added = list(data_both[data_both[indicator_col] == "right_only"][path_col])[
::-1
]
for node_removed in nodes_removed:
data_both[path_col] = data_both[path_col].str.replace(
node_removed, f"{node_removed} (-)", regex=True
)
for node_added in nodes_added:
data_both[path_col] = data_both[path_col].str.replace(
node_added, f"{node_added} (+)", regex=True
)
# Check tree attribute difference
path_changes_list_of_dict: List[Dict[str, Dict[str, Any]]] = []
path_changes_deque: Deque[str] = deque([])
for attr_change in attr_list:
condition_diff = (
(
~data_both[f"{attr_change}_x"].isnull()
| ~data_both[f"{attr_change}_y"].isnull()
)
& (data_both[f"{attr_change}_x"] != data_both[f"{attr_change}_y"])
& (data_both[indicator_col] == "both")
)
data_diff = data_both[condition_diff]
if len(data_diff):
tuple_diff = zip(
data_diff[f"{attr_change}_x"], data_diff[f"{attr_change}_y"]
)
dict_attr_diff = [{attr_change: v} for v in tuple_diff]
dict_path_diff = dict(list(zip(data_diff[path_col], dict_attr_diff)))
path_changes_list_of_dict.append(dict_path_diff)
path_changes_deque.extend(list(data_diff[path_col]))
if only_diff:
data_both = data_both[
(data_both[indicator_col] != "both")
| (data_both[path_col].isin(path_changes_deque))
]
data_both = data_both[[path_col]]
if len(data_both):
tree_diff = dataframe_to_tree(data_both, node_type=tree.__class__)
# Handle tree attribute difference
if len(path_changes_deque):
path_changes_list = sorted(path_changes_deque, reverse=True)
name_changes_list = [
{k: {"name": f"{k.split(tree.sep)[-1]} (~)"} for k in path_changes_list}
]
path_changes_list_of_dict.extend(name_changes_list)
for attr_change_dict in path_changes_list_of_dict:
tree_diff = add_dict_to_tree_by_path(tree_diff, attr_change_dict)
return tree_diff