added 3rd party packages, elog, bigtree
This commit is contained in:
0
python37/packages/bigtree/dag/__init__.py
Normal file
0
python37/packages/bigtree/dag/__init__.py
Normal file
186
python37/packages/bigtree/dag/construct.py
Normal file
186
python37/packages/bigtree/dag/construct.py
Normal file
@@ -0,0 +1,186 @@
|
||||
from typing import List, Tuple, Type
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from bigtree.node.dagnode import DAGNode
|
||||
|
||||
__all__ = ["list_to_dag", "dict_to_dag", "dataframe_to_dag"]
|
||||
|
||||
|
||||
def list_to_dag(
|
||||
relations: List[Tuple[str, str]],
|
||||
node_type: Type[DAGNode] = DAGNode,
|
||||
) -> DAGNode:
|
||||
"""Construct DAG from list of tuple containing parent-child names.
|
||||
Note that node names must be unique.
|
||||
|
||||
>>> from bigtree import list_to_dag, dag_iterator
|
||||
>>> relations_list = [("a", "c"), ("a", "d"), ("b", "c"), ("c", "d"), ("d", "e")]
|
||||
>>> dag = list_to_dag(relations_list)
|
||||
>>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(dag)]
|
||||
[('a', 'd'), ('c', 'd'), ('d', 'e'), ('a', 'c'), ('b', 'c')]
|
||||
|
||||
Args:
|
||||
relations (list): list containing tuple of parent-child names
|
||||
node_type (Type[DAGNode]): node type of DAG to be created, defaults to DAGNode
|
||||
|
||||
Returns:
|
||||
(DAGNode)
|
||||
"""
|
||||
if not len(relations):
|
||||
raise ValueError("Input list does not contain any data, check `relations`")
|
||||
|
||||
relation_data = pd.DataFrame(relations, columns=["parent", "child"])
|
||||
return dataframe_to_dag(
|
||||
relation_data, child_col="child", parent_col="parent", node_type=node_type
|
||||
)
|
||||
|
||||
|
||||
def dict_to_dag(
|
||||
relation_attrs: dict,
|
||||
parent_key: str = "parents",
|
||||
node_type: Type[DAGNode] = DAGNode,
|
||||
) -> DAGNode:
|
||||
"""Construct DAG from nested dictionary, ``key``: child name, ``value``: dict of parent names, attribute name and
|
||||
attribute value.
|
||||
Note that node names must be unique.
|
||||
|
||||
>>> from bigtree import dict_to_dag, dag_iterator
|
||||
>>> relation_dict = {
|
||||
... "a": {"step": 1},
|
||||
... "b": {"step": 1},
|
||||
... "c": {"parents": ["a", "b"], "step": 2},
|
||||
... "d": {"parents": ["a", "c"], "step": 2},
|
||||
... "e": {"parents": ["d"], "step": 3},
|
||||
... }
|
||||
>>> dag = dict_to_dag(relation_dict, parent_key="parents")
|
||||
>>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(dag)]
|
||||
[('a', 'd'), ('c', 'd'), ('d', 'e'), ('a', 'c'), ('b', 'c')]
|
||||
|
||||
Args:
|
||||
relation_attrs (dict): dictionary containing node, node parents, and node attribute information,
|
||||
key: child name, value: dict of parent names, node attribute and attribute value
|
||||
parent_key (str): key of dictionary to retrieve list of parents name, defaults to "parent"
|
||||
node_type (Type[DAGNode]): node type of DAG to be created, defaults to DAGNode
|
||||
|
||||
Returns:
|
||||
(DAGNode)
|
||||
"""
|
||||
if not len(relation_attrs):
|
||||
raise ValueError("Dictionary does not contain any data, check `relation_attrs`")
|
||||
|
||||
# Convert dictionary to dataframe
|
||||
data = pd.DataFrame(relation_attrs).T.rename_axis("_tmp_child").reset_index()
|
||||
assert (
|
||||
parent_key in data
|
||||
), f"Parent key {parent_key} not in dictionary, check `relation_attrs` and `parent_key`"
|
||||
|
||||
data = data.explode(parent_key)
|
||||
return dataframe_to_dag(
|
||||
data,
|
||||
child_col="_tmp_child",
|
||||
parent_col=parent_key,
|
||||
node_type=node_type,
|
||||
)
|
||||
|
||||
|
||||
def dataframe_to_dag(
|
||||
data: pd.DataFrame,
|
||||
child_col: str = None,
|
||||
parent_col: str = None,
|
||||
attribute_cols: list = [],
|
||||
node_type: Type[DAGNode] = DAGNode,
|
||||
) -> DAGNode:
|
||||
"""Construct DAG from pandas DataFrame.
|
||||
Note that node names must be unique.
|
||||
|
||||
`child_col` and `parent_col` specify columns for child name and parent name to construct DAG.
|
||||
`attribute_cols` specify columns for node attribute for child name
|
||||
If columns are not specified, `child_col` takes first column, `parent_col` takes second column, and all other
|
||||
columns are `attribute_cols`.
|
||||
|
||||
>>> import pandas as pd
|
||||
>>> from bigtree import dataframe_to_dag, dag_iterator
|
||||
>>> relation_data = pd.DataFrame([
|
||||
... ["a", None, 1],
|
||||
... ["b", None, 1],
|
||||
... ["c", "a", 2],
|
||||
... ["c", "b", 2],
|
||||
... ["d", "a", 2],
|
||||
... ["d", "c", 2],
|
||||
... ["e", "d", 3],
|
||||
... ],
|
||||
... columns=["child", "parent", "step"]
|
||||
... )
|
||||
>>> dag = dataframe_to_dag(relation_data)
|
||||
>>> [(parent.node_name, child.node_name) for parent, child in dag_iterator(dag)]
|
||||
[('a', 'd'), ('c', 'd'), ('d', 'e'), ('a', 'c'), ('b', 'c')]
|
||||
|
||||
Args:
|
||||
data (pandas.DataFrame): data containing path and node attribute information
|
||||
child_col (str): column of data containing child name information, defaults to None
|
||||
if not set, it will take the first column of data
|
||||
parent_col (str): column of data containing parent name information, defaults to None
|
||||
if not set, it will take the second column of data
|
||||
attribute_cols (list): columns of data containing child node attribute information,
|
||||
if not set, it will take all columns of data except `child_col` and `parent_col`
|
||||
node_type (Type[DAGNode]): node type of DAG to be created, defaults to DAGNode
|
||||
|
||||
Returns:
|
||||
(DAGNode)
|
||||
"""
|
||||
if not len(data.columns):
|
||||
raise ValueError("Data does not contain any columns, check `data`")
|
||||
if not len(data):
|
||||
raise ValueError("Data does not contain any rows, check `data`")
|
||||
|
||||
if not child_col:
|
||||
child_col = data.columns[0]
|
||||
if not parent_col:
|
||||
parent_col = data.columns[1]
|
||||
if not len(attribute_cols):
|
||||
attribute_cols = list(data.columns)
|
||||
attribute_cols.remove(child_col)
|
||||
attribute_cols.remove(parent_col)
|
||||
|
||||
data_check = data.copy()[[child_col] + attribute_cols].drop_duplicates()
|
||||
_duplicate_check = (
|
||||
data_check[child_col]
|
||||
.value_counts()
|
||||
.to_frame("counts")
|
||||
.rename_axis(child_col)
|
||||
.reset_index()
|
||||
)
|
||||
_duplicate_check = _duplicate_check[_duplicate_check["counts"] > 1]
|
||||
if len(_duplicate_check):
|
||||
raise ValueError(
|
||||
f"There exists duplicate child name with different attributes\nCheck {_duplicate_check}"
|
||||
)
|
||||
if np.any(data[child_col].isnull()):
|
||||
raise ValueError(f"Child name cannot be empty, check {child_col}")
|
||||
|
||||
node_dict = dict()
|
||||
parent_node = None
|
||||
|
||||
for row in data.reset_index(drop=True).to_dict(orient="index").values():
|
||||
child_name = row[child_col]
|
||||
parent_name = row[parent_col]
|
||||
node_attrs = row.copy()
|
||||
del node_attrs[child_col]
|
||||
del node_attrs[parent_col]
|
||||
node_attrs = {k: v for k, v in node_attrs.items() if not pd.isnull(v)}
|
||||
child_node = node_dict.get(child_name)
|
||||
if not child_node:
|
||||
child_node = node_type(child_name)
|
||||
node_dict[child_name] = child_node
|
||||
child_node.set_attrs(node_attrs)
|
||||
|
||||
if not pd.isnull(parent_name):
|
||||
parent_node = node_dict.get(parent_name)
|
||||
if not parent_node:
|
||||
parent_node = node_type(parent_name)
|
||||
node_dict[parent_name] = parent_node
|
||||
child_node.parents = [parent_node]
|
||||
|
||||
return parent_node
|
||||
269
python37/packages/bigtree/dag/export.py
Normal file
269
python37/packages/bigtree/dag/export.py
Normal file
@@ -0,0 +1,269 @@
|
||||
from typing import Any, Dict, List, Tuple, Union
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from bigtree.node.dagnode import DAGNode
|
||||
from bigtree.utils.iterators import dag_iterator
|
||||
|
||||
__all__ = ["dag_to_list", "dag_to_dict", "dag_to_dataframe", "dag_to_dot"]
|
||||
|
||||
|
||||
def dag_to_list(
|
||||
dag: DAGNode,
|
||||
) -> List[Tuple[str, str]]:
|
||||
"""Export DAG to list of tuple containing parent-child names
|
||||
|
||||
>>> from bigtree import DAGNode, dag_to_list
|
||||
>>> a = DAGNode("a", step=1)
|
||||
>>> b = DAGNode("b", step=1)
|
||||
>>> c = DAGNode("c", step=2, parents=[a, b])
|
||||
>>> d = DAGNode("d", step=2, parents=[a, c])
|
||||
>>> e = DAGNode("e", step=3, parents=[d])
|
||||
>>> dag_to_list(a)
|
||||
[('a', 'c'), ('a', 'd'), ('b', 'c'), ('c', 'd'), ('d', 'e')]
|
||||
|
||||
Args:
|
||||
dag (DAGNode): DAG to be exported
|
||||
|
||||
Returns:
|
||||
(List[Tuple[str, str]])
|
||||
"""
|
||||
relations = []
|
||||
for parent_node, child_node in dag_iterator(dag):
|
||||
relations.append((parent_node.node_name, child_node.node_name))
|
||||
return relations
|
||||
|
||||
|
||||
def dag_to_dict(
|
||||
dag: DAGNode,
|
||||
parent_key: str = "parents",
|
||||
attr_dict: dict = {},
|
||||
all_attrs: bool = False,
|
||||
) -> Dict[str, Any]:
|
||||
"""Export tree to dictionary.
|
||||
|
||||
Exported dictionary will have key as child name, and parent names and node attributes as a nested dictionary.
|
||||
|
||||
>>> from bigtree import DAGNode, dag_to_dict
|
||||
>>> a = DAGNode("a", step=1)
|
||||
>>> b = DAGNode("b", step=1)
|
||||
>>> c = DAGNode("c", step=2, parents=[a, b])
|
||||
>>> d = DAGNode("d", step=2, parents=[a, c])
|
||||
>>> e = DAGNode("e", step=3, parents=[d])
|
||||
>>> dag_to_dict(a, parent_key="parent", attr_dict={"step": "step no."})
|
||||
{'a': {'step no.': 1}, 'c': {'parent': ['a', 'b'], 'step no.': 2}, 'd': {'parent': ['a', 'c'], 'step no.': 2}, 'b': {'step no.': 1}, 'e': {'parent': ['d'], 'step no.': 3}}
|
||||
|
||||
Args:
|
||||
dag (DAGNode): DAG to be exported
|
||||
parent_key (str): dictionary key for `node.parent.node_name`, defaults to `parents`
|
||||
attr_dict (dict): dictionary mapping node attributes to dictionary key,
|
||||
key: node attributes, value: corresponding dictionary key, optional
|
||||
all_attrs (bool): indicator whether to retrieve all `Node` attributes
|
||||
|
||||
Returns:
|
||||
(dict)
|
||||
"""
|
||||
dag = dag.copy()
|
||||
data_dict = {}
|
||||
|
||||
for parent_node, child_node in dag_iterator(dag):
|
||||
if parent_node.is_root:
|
||||
data_parent = {}
|
||||
if all_attrs:
|
||||
data_parent.update(
|
||||
parent_node.describe(
|
||||
exclude_attributes=["name"], exclude_prefix="_"
|
||||
)
|
||||
)
|
||||
else:
|
||||
for k, v in attr_dict.items():
|
||||
data_parent[v] = parent_node.get_attr(k)
|
||||
data_dict[parent_node.node_name] = data_parent
|
||||
|
||||
if data_dict.get(child_node.node_name):
|
||||
data_dict[child_node.node_name][parent_key].append(parent_node.node_name)
|
||||
else:
|
||||
data_child = {parent_key: [parent_node.node_name]}
|
||||
if all_attrs:
|
||||
data_child.update(
|
||||
child_node.describe(exclude_attributes=["name"], exclude_prefix="_")
|
||||
)
|
||||
else:
|
||||
for k, v in attr_dict.items():
|
||||
data_child[v] = child_node.get_attr(k)
|
||||
data_dict[child_node.node_name] = data_child
|
||||
return data_dict
|
||||
|
||||
|
||||
def dag_to_dataframe(
|
||||
dag: DAGNode,
|
||||
name_col: str = "name",
|
||||
parent_col: str = "parent",
|
||||
attr_dict: dict = {},
|
||||
all_attrs: bool = False,
|
||||
) -> pd.DataFrame:
|
||||
"""Export DAG to pandas DataFrame.
|
||||
|
||||
>>> from bigtree import DAGNode, dag_to_dataframe
|
||||
>>> a = DAGNode("a", step=1)
|
||||
>>> b = DAGNode("b", step=1)
|
||||
>>> c = DAGNode("c", step=2, parents=[a, b])
|
||||
>>> d = DAGNode("d", step=2, parents=[a, c])
|
||||
>>> e = DAGNode("e", step=3, parents=[d])
|
||||
>>> dag_to_dataframe(a, name_col="name", parent_col="parent", attr_dict={"step": "step no."})
|
||||
name parent step no.
|
||||
0 a None 1
|
||||
1 c a 2
|
||||
2 d a 2
|
||||
3 b None 1
|
||||
4 c b 2
|
||||
5 d c 2
|
||||
6 e d 3
|
||||
|
||||
Args:
|
||||
dag (DAGNode): DAG to be exported
|
||||
name_col (str): column name for `node.node_name`, defaults to 'name'
|
||||
parent_col (str): column name for `node.parent.node_name`, defaults to 'parent'
|
||||
attr_dict (dict): dictionary mapping node attributes to column name,
|
||||
key: node attributes, value: corresponding column in dataframe, optional
|
||||
all_attrs (bool): indicator whether to retrieve all `Node` attributes
|
||||
|
||||
Returns:
|
||||
(pd.DataFrame)
|
||||
"""
|
||||
dag = dag.copy()
|
||||
data_list = []
|
||||
|
||||
for parent_node, child_node in dag_iterator(dag):
|
||||
if parent_node.is_root:
|
||||
data_parent = {name_col: parent_node.node_name, parent_col: None}
|
||||
if all_attrs:
|
||||
data_parent.update(
|
||||
parent_node.describe(
|
||||
exclude_attributes=["name"], exclude_prefix="_"
|
||||
)
|
||||
)
|
||||
else:
|
||||
for k, v in attr_dict.items():
|
||||
data_parent[v] = parent_node.get_attr(k)
|
||||
data_list.append(data_parent)
|
||||
|
||||
data_child = {name_col: child_node.node_name, parent_col: parent_node.node_name}
|
||||
if all_attrs:
|
||||
data_child.update(
|
||||
child_node.describe(exclude_attributes=["name"], exclude_prefix="_")
|
||||
)
|
||||
else:
|
||||
for k, v in attr_dict.items():
|
||||
data_child[v] = child_node.get_attr(k)
|
||||
data_list.append(data_child)
|
||||
return pd.DataFrame(data_list).drop_duplicates().reset_index(drop=True)
|
||||
|
||||
|
||||
def dag_to_dot(
|
||||
dag: Union[DAGNode, List[DAGNode]],
|
||||
rankdir: str = "TB",
|
||||
bg_colour: str = None,
|
||||
node_colour: str = None,
|
||||
edge_colour: str = None,
|
||||
node_attr: str = None,
|
||||
edge_attr: str = None,
|
||||
):
|
||||
r"""Export DAG tree or list of DAG trees to image.
|
||||
Note that node names must be unique.
|
||||
Posible node attributes include style, fillcolor, shape.
|
||||
|
||||
>>> from bigtree import DAGNode, dag_to_dot
|
||||
>>> a = DAGNode("a", step=1)
|
||||
>>> b = DAGNode("b", step=1)
|
||||
>>> c = DAGNode("c", step=2, parents=[a, b])
|
||||
>>> d = DAGNode("d", step=2, parents=[a, c])
|
||||
>>> e = DAGNode("e", step=3, parents=[d])
|
||||
>>> dag_graph = dag_to_dot(a)
|
||||
|
||||
Export to image, dot file, etc.
|
||||
|
||||
>>> dag_graph.write_png("tree_dag.png")
|
||||
>>> dag_graph.write_dot("tree_dag.dot")
|
||||
|
||||
Export to string
|
||||
|
||||
>>> dag_graph.to_string()
|
||||
'strict digraph G {\nrankdir=TB;\nc [label=c];\na [label=a];\na -> c;\nd [label=d];\na [label=a];\na -> d;\nc [label=c];\nb [label=b];\nb -> c;\nd [label=d];\nc [label=c];\nc -> d;\ne [label=e];\nd [label=d];\nd -> e;\n}\n'
|
||||
|
||||
Args:
|
||||
dag (Union[DAGNode, List[DAGNode]]): DAG or list of DAGs to be exported
|
||||
rankdir (str): set direction of graph layout, defaults to 'TB', can be 'BT, 'LR', 'RL'
|
||||
bg_colour (str): background color of image, defaults to None
|
||||
node_colour (str): fill colour of nodes, defaults to None
|
||||
edge_colour (str): colour of edges, defaults to None
|
||||
node_attr (str): node attribute for style, overrides node_colour, defaults to None
|
||||
Possible node attributes include {"style": "filled", "fillcolor": "gold"}
|
||||
edge_attr (str): edge attribute for style, overrides edge_colour, defaults to None
|
||||
Possible edge attributes include {"style": "bold", "label": "edge label", "color": "black"}
|
||||
|
||||
Returns:
|
||||
(pydot.Dot)
|
||||
"""
|
||||
try:
|
||||
import pydot
|
||||
except ImportError: # pragma: no cover
|
||||
raise ImportError(
|
||||
"pydot not available. Please perform a\n\npip install 'bigtree[image]'\n\nto install required dependencies"
|
||||
)
|
||||
|
||||
# Get style
|
||||
if bg_colour:
|
||||
graph_style = dict(bgcolor=bg_colour)
|
||||
else:
|
||||
graph_style = dict()
|
||||
|
||||
if node_colour:
|
||||
node_style = dict(style="filled", fillcolor=node_colour)
|
||||
else:
|
||||
node_style = dict()
|
||||
|
||||
if edge_colour:
|
||||
edge_style = dict(color=edge_colour)
|
||||
else:
|
||||
edge_style = dict()
|
||||
|
||||
_graph = pydot.Dot(
|
||||
graph_type="digraph", strict=True, rankdir=rankdir, **graph_style
|
||||
)
|
||||
|
||||
if not isinstance(dag, list):
|
||||
dag = [dag]
|
||||
|
||||
for _dag in dag:
|
||||
if not isinstance(_dag, DAGNode):
|
||||
raise ValueError(
|
||||
"Tree should be of type `DAGNode`, or inherit from `DAGNode`"
|
||||
)
|
||||
_dag = _dag.copy()
|
||||
|
||||
for parent_node, child_node in dag_iterator(_dag):
|
||||
child_name = child_node.name
|
||||
child_node_style = node_style.copy()
|
||||
if node_attr and child_node.get_attr(node_attr):
|
||||
child_node_style.update(child_node.get_attr(node_attr))
|
||||
if edge_attr:
|
||||
edge_style.update(child_node.get_attr(edge_attr))
|
||||
pydot_child = pydot.Node(
|
||||
name=child_name, label=child_name, **child_node_style
|
||||
)
|
||||
_graph.add_node(pydot_child)
|
||||
|
||||
parent_name = parent_node.name
|
||||
parent_node_style = node_style.copy()
|
||||
if node_attr and parent_node.get_attr(node_attr):
|
||||
parent_node_style.update(parent_node.get_attr(node_attr))
|
||||
pydot_parent = pydot.Node(
|
||||
name=parent_name, label=parent_name, **parent_node_style
|
||||
)
|
||||
_graph.add_node(pydot_parent)
|
||||
|
||||
edge = pydot.Edge(parent_name, child_name, **edge_style)
|
||||
_graph.add_edge(edge)
|
||||
|
||||
return _graph
|
||||
Reference in New Issue
Block a user