Fixed database typo and removed unnecessary class identifier.

This commit is contained in:
Batuhan Berk Başoğlu 2020-10-14 10:10:37 -04:00
parent 00ad49a143
commit 45fb349a7d
5098 changed files with 952558 additions and 85 deletions

View file

@ -0,0 +1,6 @@
from .branchings import *
from .coding import *
from .mst import *
from .recognition import *
from .operations import *
from .decomposition import *

View file

@ -0,0 +1,726 @@
"""
Algorithms for finding optimum branchings and spanning arborescences.
This implementation is based on:
J. Edmonds, Optimum branchings, J. Res. Natl. Bur. Standards 71B (1967),
233240. URL: http://archive.org/details/jresv71Bn4p233
"""
# TODO: Implement method from Gabow, Galil, Spence and Tarjan:
#
# @article{
# year={1986},
# issn={0209-9683},
# journal={Combinatorica},
# volume={6},
# number={2},
# doi={10.1007/BF02579168},
# title={Efficient algorithms for finding minimum spanning trees in
# undirected and directed graphs},
# url={https://doi.org/10.1007/BF02579168},
# publisher={Springer-Verlag},
# keywords={68 B 15; 68 C 05},
# author={Gabow, Harold N. and Galil, Zvi and Spencer, Thomas and Tarjan,
# Robert E.},
# pages={109-122},
# language={English}
# }
import string
from operator import itemgetter
import networkx as nx
from networkx.utils import py_random_state
from .recognition import is_arborescence, is_branching
__all__ = [
"branching_weight",
"greedy_branching",
"maximum_branching",
"minimum_branching",
"maximum_spanning_arborescence",
"minimum_spanning_arborescence",
"Edmonds",
]
KINDS = {"max", "min"}
STYLES = {
"branching": "branching",
"arborescence": "arborescence",
"spanning arborescence": "arborescence",
}
INF = float("inf")
@py_random_state(1)
def random_string(L=15, seed=None):
return "".join([seed.choice(string.ascii_letters) for n in range(L)])
def _min_weight(weight):
return -weight
def _max_weight(weight):
return weight
def branching_weight(G, attr="weight", default=1):
"""
Returns the total weight of a branching.
"""
return sum(edge[2].get(attr, default) for edge in G.edges(data=True))
@py_random_state(4)
def greedy_branching(G, attr="weight", default=1, kind="max", seed=None):
"""
Returns a branching obtained through a greedy algorithm.
This algorithm is wrong, and cannot give a proper optimal branching.
However, we include it for pedagogical reasons, as it can be helpful to
see what its outputs are.
The output is a branching, and possibly, a spanning arborescence. However,
it is not guaranteed to be optimal in either case.
Parameters
----------
G : DiGraph
The directed graph to scan.
attr : str
The attribute to use as weights. If None, then each edge will be
treated equally with a weight of 1.
default : float
When `attr` is not None, then if an edge does not have that attribute,
`default` specifies what value it should take.
kind : str
The type of optimum to search for: 'min' or 'max' greedy branching.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Returns
-------
B : directed graph
The greedily obtained branching.
"""
if kind not in KINDS:
raise nx.NetworkXException("Unknown value for `kind`.")
if kind == "min":
reverse = False
else:
reverse = True
if attr is None:
# Generate a random string the graph probably won't have.
attr = random_string(seed=seed)
edges = [(u, v, data.get(attr, default)) for (u, v, data) in G.edges(data=True)]
# We sort by weight, but also by nodes to normalize behavior across runs.
try:
edges.sort(key=itemgetter(2, 0, 1), reverse=reverse)
except TypeError:
# This will fail in Python 3.x if the nodes are of varying types.
# In that case, we use the arbitrary order.
edges.sort(key=itemgetter(2), reverse=reverse)
# The branching begins with a forest of no edges.
B = nx.DiGraph()
B.add_nodes_from(G)
# Now we add edges greedily so long we maintain the branching.
uf = nx.utils.UnionFind()
for i, (u, v, w) in enumerate(edges):
if uf[u] == uf[v]:
# Adding this edge would form a directed cycle.
continue
elif B.in_degree(v) == 1:
# The edge would increase the degree to be greater than one.
continue
else:
# If attr was None, then don't insert weights...
data = {}
if attr is not None:
data[attr] = w
B.add_edge(u, v, **data)
uf.union(u, v)
return B
class MultiDiGraph_EdgeKey(nx.MultiDiGraph):
"""
MultiDiGraph which assigns unique keys to every edge.
Adds a dictionary edge_index which maps edge keys to (u, v, data) tuples.
This is not a complete implementation. For Edmonds algorithm, we only use
add_node and add_edge, so that is all that is implemented here. During
additions, any specified keys are ignored---this means that you also
cannot update edge attributes through add_node and add_edge.
Why do we need this? Edmonds algorithm requires that we track edges, even
as we change the head and tail of an edge, and even changing the weight
of edges. We must reliably track edges across graph mutations.
"""
def __init__(self, incoming_graph_data=None, **attr):
cls = super()
cls.__init__(incoming_graph_data=incoming_graph_data, **attr)
self._cls = cls
self.edge_index = {}
def remove_node(self, n):
keys = set()
for keydict in self.pred[n].values():
keys.update(keydict)
for keydict in self.succ[n].values():
keys.update(keydict)
for key in keys:
del self.edge_index[key]
self._cls.remove_node(n)
def remove_nodes_from(self, nbunch):
for n in nbunch:
self.remove_node(n)
def add_edge(self, u_for_edge, v_for_edge, key_for_edge, **attr):
"""
Key is now required.
"""
u, v, key = u_for_edge, v_for_edge, key_for_edge
if key in self.edge_index:
uu, vv, _ = self.edge_index[key]
if (u != uu) or (v != vv):
raise Exception(f"Key {key!r} is already in use.")
self._cls.add_edge(u, v, key, **attr)
self.edge_index[key] = (u, v, self.succ[u][v][key])
def add_edges_from(self, ebunch_to_add, **attr):
for u, v, k, d in ebunch_to_add:
self.add_edge(u, v, k, **d)
def remove_edge_with_key(self, key):
try:
u, v, _ = self.edge_index[key]
except KeyError as e:
raise KeyError(f"Invalid edge key {key!r}") from e
else:
del self.edge_index[key]
self._cls.remove_edge(u, v, key)
def remove_edges_from(self, ebunch):
raise NotImplementedError
def get_path(G, u, v):
"""
Returns the edge keys of the unique path between u and v.
This is not a generic function. G must be a branching and an instance of
MultiDiGraph_EdgeKey.
"""
nodes = nx.shortest_path(G, u, v)
# We are guaranteed that there is only one edge connected every node
# in the shortest path.
def first_key(i, vv):
# Needed for 2.x/3.x compatibilitity
keys = G[nodes[i]][vv].keys()
# Normalize behavior
keys = list(keys)
return keys[0]
edges = [first_key(i, vv) for i, vv in enumerate(nodes[1:])]
return nodes, edges
class Edmonds:
"""
Edmonds algorithm for finding optimal branchings and spanning arborescences.
"""
def __init__(self, G, seed=None):
self.G_original = G
# Need to fix this. We need the whole tree.
self.store = True
# The final answer.
self.edges = []
# Since we will be creating graphs with new nodes, we need to make
# sure that our node names do not conflict with the real node names.
self.template = random_string(seed=seed) + "_{0}"
def _init(self, attr, default, kind, style, preserve_attrs, seed):
if kind not in KINDS:
raise nx.NetworkXException("Unknown value for `kind`.")
# Store inputs.
self.attr = attr
self.default = default
self.kind = kind
self.style = style
# Determine how we are going to transform the weights.
if kind == "min":
self.trans = trans = _min_weight
else:
self.trans = trans = _max_weight
if attr is None:
# Generate a random attr the graph probably won't have.
attr = random_string(seed=seed)
# This is the actual attribute used by the algorithm.
self._attr = attr
# This attribute is used to store whether a particular edge is still
# a candidate. We generate a random attr to remove clashes with
# preserved edges
self.candidate_attr = "candidate_" + random_string(seed=seed)
# The object we manipulate at each step is a multidigraph.
self.G = G = MultiDiGraph_EdgeKey()
for key, (u, v, data) in enumerate(self.G_original.edges(data=True)):
d = {attr: trans(data.get(attr, default))}
if preserve_attrs:
for (d_k, d_v) in data.items():
if d_k != attr:
d[d_k] = d_v
G.add_edge(u, v, key, **d)
self.level = 0
# These are the "buckets" from the paper.
#
# As in the paper, G^i are modified versions of the original graph.
# D^i and E^i are nodes and edges of the maximal edges that are
# consistent with G^i. These are dashed edges in figures A-F of the
# paper. In this implementation, we store D^i and E^i together as a
# graph B^i. So we will have strictly more B^i than the paper does.
self.B = MultiDiGraph_EdgeKey()
self.B.edge_index = {}
self.graphs = [] # G^i
self.branchings = [] # B^i
self.uf = nx.utils.UnionFind()
# A list of lists of edge indexes. Each list is a circuit for graph G^i.
# Note the edge list will not, in general, be a circuit in graph G^0.
self.circuits = []
# Stores the index of the minimum edge in the circuit found in G^i and B^i.
# The ordering of the edges seems to preserve the weight ordering from G^0.
# So even if the circuit does not form a circuit in G^0, it is still true
# that the minimum edge of the circuit in G^i is still the minimum edge
# in circuit G^0 (depsite their weights being different).
self.minedge_circuit = []
def find_optimum(
self,
attr="weight",
default=1,
kind="max",
style="branching",
preserve_attrs=False,
seed=None,
):
"""
Returns a branching from G.
Parameters
----------
attr : str
The edge attribute used to in determining optimality.
default : float
The value of the edge attribute used if an edge does not have
the attribute `attr`.
kind : {'min', 'max'}
The type of optimum to search for, either 'min' or 'max'.
style : {'branching', 'arborescence'}
If 'branching', then an optimal branching is found. If `style` is
'arborescence', then a branching is found, such that if the
branching is also an arborescence, then the branching is an
optimal spanning arborescences. A given graph G need not have
an optimal spanning arborescence.
preserve_attrs : bool
If True, preserve the other edge attributes of the original
graph (that are not the one passed to `attr`)
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Returns
-------
H : (multi)digraph
The branching.
"""
self._init(attr, default, kind, style, preserve_attrs, seed)
uf = self.uf
# This enormous while loop could use some refactoring...
G, B = self.G, self.B
D = set()
nodes = iter(list(G.nodes()))
attr = self._attr
G_pred = G.pred
def desired_edge(v):
"""
Find the edge directed toward v with maximal weight.
"""
edge = None
weight = -INF
for u, _, key, data in G.in_edges(v, data=True, keys=True):
new_weight = data[attr]
if new_weight > weight:
weight = new_weight
edge = (u, v, key, new_weight)
return edge, weight
while True:
# (I1): Choose a node v in G^i not in D^i.
try:
v = next(nodes)
except StopIteration:
# If there are no more new nodes to consider, then we *should*
# meet the break condition (b) from the paper:
# (b) every node of G^i is in D^i and E^i is a branching
# Construction guarantees that it's a branching.
assert len(G) == len(B)
if len(B):
assert is_branching(B)
if self.store:
self.graphs.append(G.copy())
self.branchings.append(B.copy())
# Add these to keep the lengths equal. Element i is the
# circuit at level i that was merged to form branching i+1.
# There is no circuit for the last level.
self.circuits.append([])
self.minedge_circuit.append(None)
break
else:
if v in D:
# print("v in D", v)
continue
# Put v into bucket D^i.
# print(f"Adding node {v}")
D.add(v)
B.add_node(v)
edge, weight = desired_edge(v)
# print(f"Max edge is {edge!r}")
if edge is None:
# If there is no edge, continue with a new node at (I1).
continue
else:
# Determine if adding the edge to E^i would mean its no longer
# a branching. Presently, v has indegree 0 in B---it is a root.
u = edge[0]
if uf[u] == uf[v]:
# Then adding the edge will create a circuit. Then B
# contains a unique path P from v to u. So condition (a)
# from the paper does hold. We need to store the circuit
# for future reference.
Q_nodes, Q_edges = get_path(B, v, u)
Q_edges.append(edge[2])
else:
# Then B with the edge is still a branching and condition
# (a) from the paper does not hold.
Q_nodes, Q_edges = None, None
# Conditions for adding the edge.
# If weight < 0, then it cannot help in finding a maximum branching.
if self.style == "branching" and weight <= 0:
acceptable = False
else:
acceptable = True
# print(f"Edge is acceptable: {acceptable}")
if acceptable:
dd = {attr: weight}
B.add_edge(u, v, edge[2], **dd)
G[u][v][edge[2]][self.candidate_attr] = True
uf.union(u, v)
if Q_edges is not None:
# print("Edge introduced a simple cycle:")
# print(Q_nodes, Q_edges)
# Move to method
# Previous meaning of u and v is no longer important.
# Apply (I2).
# Get the edge in the cycle with the minimum weight.
# Also, save the incoming weights for each node.
minweight = INF
minedge = None
Q_incoming_weight = {}
for edge_key in Q_edges:
u, v, data = B.edge_index[edge_key]
w = data[attr]
Q_incoming_weight[v] = w
if w < minweight:
minweight = w
minedge = edge_key
self.circuits.append(Q_edges)
self.minedge_circuit.append(minedge)
if self.store:
self.graphs.append(G.copy())
# Always need the branching with circuits.
self.branchings.append(B.copy())
# Now we mutate it.
new_node = self.template.format(self.level)
# print(minweight, minedge, Q_incoming_weight)
G.add_node(new_node)
new_edges = []
for u, v, key, data in G.edges(data=True, keys=True):
if u in Q_incoming_weight:
if v in Q_incoming_weight:
# Circuit edge, do nothing for now.
# Eventually delete it.
continue
else:
# Outgoing edge. Make it from new node
dd = data.copy()
new_edges.append((new_node, v, key, dd))
else:
if v in Q_incoming_weight:
# Incoming edge. Change its weight
w = data[attr]
w += minweight - Q_incoming_weight[v]
dd = data.copy()
dd[attr] = w
new_edges.append((u, new_node, key, dd))
else:
# Outside edge. No modification necessary.
continue
G.remove_nodes_from(Q_nodes)
B.remove_nodes_from(Q_nodes)
D.difference_update(set(Q_nodes))
for u, v, key, data in new_edges:
G.add_edge(u, v, key, **data)
if self.candidate_attr in data:
del data[self.candidate_attr]
B.add_edge(u, v, key, **data)
uf.union(u, v)
nodes = iter(list(G.nodes()))
self.level += 1
# (I3) Branch construction.
# print(self.level)
H = self.G_original.__class__()
def is_root(G, u, edgekeys):
"""
Returns True if `u` is a root node in G.
Node `u` will be a root node if its in-degree, restricted to the
specified edges, is equal to 0.
"""
if u not in G:
# print(G.nodes(), u)
raise Exception(f"{u!r} not in G")
for v in G.pred[u]:
for edgekey in G.pred[u][v]:
if edgekey in edgekeys:
return False, edgekey
else:
return True, None
# Start with the branching edges in the last level.
edges = set(self.branchings[self.level].edge_index)
while self.level > 0:
self.level -= 1
# The current level is i, and we start counting from 0.
# We need the node at level i+1 that results from merging a circuit
# at level i. randomname_0 is the first merged node and this
# happens at level 1. That is, randomname_0 is a node at level 1
# that results from merging a circuit at level 0.
merged_node = self.template.format(self.level)
# The circuit at level i that was merged as a node the graph
# at level i+1.
circuit = self.circuits[self.level]
# print
# print(merged_node, self.level, circuit)
# print("before", edges)
# Note, we ask if it is a root in the full graph, not the branching.
# The branching alone doesn't have all the edges.
isroot, edgekey = is_root(self.graphs[self.level + 1], merged_node, edges)
edges.update(circuit)
if isroot:
minedge = self.minedge_circuit[self.level]
if minedge is None:
raise Exception
# Remove the edge in the cycle with minimum weight.
edges.remove(minedge)
else:
# We have identified an edge at next higher level that
# transitions into the merged node at the level. That edge
# transitions to some corresponding node at the current level.
# We want to remove an edge from the cycle that transitions
# into the corresponding node.
# print("edgekey is: ", edgekey)
# print("circuit is: ", circuit)
# The branching at level i
G = self.graphs[self.level]
# print(G.edge_index)
target = G.edge_index[edgekey][1]
for edgekey in circuit:
u, v, data = G.edge_index[edgekey]
if v == target:
break
else:
raise Exception("Couldn't find edge incoming to merged node.")
# print(f"not a root. removing {edgekey}")
edges.remove(edgekey)
self.edges = edges
H.add_nodes_from(self.G_original)
for edgekey in edges:
u, v, d = self.graphs[0].edge_index[edgekey]
dd = {self.attr: self.trans(d[self.attr])}
# Optionally, preserve the other edge attributes of the original
# graph
if preserve_attrs:
for (key, value) in d.items():
if key not in [self.attr, self.candidate_attr]:
dd[key] = value
# TODO: make this preserve the key.
H.add_edge(u, v, **dd)
return H
def maximum_branching(G, attr="weight", default=1, preserve_attrs=False):
ed = Edmonds(G)
B = ed.find_optimum(
attr, default, kind="max", style="branching", preserve_attrs=preserve_attrs
)
return B
def minimum_branching(G, attr="weight", default=1, preserve_attrs=False):
ed = Edmonds(G)
B = ed.find_optimum(
attr, default, kind="min", style="branching", preserve_attrs=preserve_attrs
)
return B
def maximum_spanning_arborescence(G, attr="weight", default=1, preserve_attrs=False):
ed = Edmonds(G)
B = ed.find_optimum(
attr, default, kind="max", style="arborescence", preserve_attrs=preserve_attrs
)
if not is_arborescence(B):
msg = "No maximum spanning arborescence in G."
raise nx.exception.NetworkXException(msg)
return B
def minimum_spanning_arborescence(G, attr="weight", default=1, preserve_attrs=False):
ed = Edmonds(G)
B = ed.find_optimum(
attr, default, kind="min", style="arborescence", preserve_attrs=preserve_attrs
)
if not is_arborescence(B):
msg = "No minimum spanning arborescence in G."
raise nx.exception.NetworkXException(msg)
return B
docstring_branching = """
Returns a {kind} {style} from G.
Parameters
----------
G : (multi)digraph-like
The graph to be searched.
attr : str
The edge attribute used to in determining optimality.
default : float
The value of the edge attribute used if an edge does not have
the attribute `attr`.
preserve_attrs : bool
If True, preserve the other attributes of the original graph (that are not
passed to `attr`)
Returns
-------
B : (multi)digraph-like
A {kind} {style}.
"""
docstring_arborescence = (
docstring_branching
+ """
Raises
------
NetworkXException
If the graph does not contain a {kind} {style}.
"""
)
maximum_branching.__doc__ = docstring_branching.format(
kind="maximum", style="branching"
)
minimum_branching.__doc__ = docstring_branching.format(
kind="minimum", style="branching"
)
maximum_spanning_arborescence.__doc__ = docstring_arborescence.format(
kind="maximum", style="spanning arborescence"
)
minimum_spanning_arborescence.__doc__ = docstring_arborescence.format(
kind="minimum", style="spanning arborescence"
)

View file

@ -0,0 +1,398 @@
"""Functions for encoding and decoding trees.
Since a tree is a highly restricted form of graph, it can be represented
concisely in several ways. This module includes functions for encoding
and decoding trees in the form of nested tuples and Prüfer
sequences. The former requires a rooted tree, whereas the latter can be
applied to unrooted trees. Furthermore, there is a bijection from Prüfer
sequences to labeled trees.
"""
from collections import Counter
from itertools import chain
import networkx as nx
from networkx.utils import not_implemented_for
__all__ = [
"from_nested_tuple",
"from_prufer_sequence",
"NotATree",
"to_nested_tuple",
"to_prufer_sequence",
]
class NotATree(nx.NetworkXException):
"""Raised when a function expects a tree (that is, a connected
undirected graph with no cycles) but gets a non-tree graph as input
instead.
"""
@not_implemented_for("directed")
def to_nested_tuple(T, root, canonical_form=False):
"""Returns a nested tuple representation of the given tree.
The nested tuple representation of a tree is defined
recursively. The tree with one node and no edges is represented by
the empty tuple, ``()``. A tree with ``k`` subtrees is represented
by a tuple of length ``k`` in which each element is the nested tuple
representation of a subtree.
Parameters
----------
T : NetworkX graph
An undirected graph object representing a tree.
root : node
The node in ``T`` to interpret as the root of the tree.
canonical_form : bool
If ``True``, each tuple is sorted so that the function returns
a canonical form for rooted trees. This means "lighter" subtrees
will appear as nested tuples before "heavier" subtrees. In this
way, each isomorphic rooted tree has the same nested tuple
representation.
Returns
-------
tuple
A nested tuple representation of the tree.
Notes
-----
This function is *not* the inverse of :func:`from_nested_tuple`; the
only guarantee is that the rooted trees are isomorphic.
See also
--------
from_nested_tuple
to_prufer_sequence
Examples
--------
The tree need not be a balanced binary tree::
>>> T = nx.Graph()
>>> T.add_edges_from([(0, 1), (0, 2), (0, 3)])
>>> T.add_edges_from([(1, 4), (1, 5)])
>>> T.add_edges_from([(3, 6), (3, 7)])
>>> root = 0
>>> nx.to_nested_tuple(T, root)
(((), ()), (), ((), ()))
Continuing the above example, if ``canonical_form`` is ``True``, the
nested tuples will be sorted::
>>> nx.to_nested_tuple(T, root, canonical_form=True)
((), ((), ()), ((), ()))
Even the path graph can be interpreted as a tree::
>>> T = nx.path_graph(4)
>>> root = 0
>>> nx.to_nested_tuple(T, root)
((((),),),)
"""
def _make_tuple(T, root, _parent):
"""Recursively compute the nested tuple representation of the
given rooted tree.
``_parent`` is the parent node of ``root`` in the supertree in
which ``T`` is a subtree, or ``None`` if ``root`` is the root of
the supertree. This argument is used to determine which
neighbors of ``root`` are children and which is the parent.
"""
# Get the neighbors of `root` that are not the parent node. We
# are guaranteed that `root` is always in `T` by construction.
children = set(T[root]) - {_parent}
if len(children) == 0:
return ()
nested = (_make_tuple(T, v, root) for v in children)
if canonical_form:
nested = sorted(nested)
return tuple(nested)
# Do some sanity checks on the input.
if not nx.is_tree(T):
raise nx.NotATree("provided graph is not a tree")
if root not in T:
raise nx.NodeNotFound(f"Graph {T} contains no node {root}")
return _make_tuple(T, root, None)
def from_nested_tuple(sequence, sensible_relabeling=False):
"""Returns the rooted tree corresponding to the given nested tuple.
The nested tuple representation of a tree is defined
recursively. The tree with one node and no edges is represented by
the empty tuple, ``()``. A tree with ``k`` subtrees is represented
by a tuple of length ``k`` in which each element is the nested tuple
representation of a subtree.
Parameters
----------
sequence : tuple
A nested tuple representing a rooted tree.
sensible_relabeling : bool
Whether to relabel the nodes of the tree so that nodes are
labeled in increasing order according to their breadth-first
search order from the root node.
Returns
-------
NetworkX graph
The tree corresponding to the given nested tuple, whose root
node is node 0. If ``sensible_labeling`` is ``True``, nodes will
be labeled in breadth-first search order starting from the root
node.
Notes
-----
This function is *not* the inverse of :func:`to_nested_tuple`; the
only guarantee is that the rooted trees are isomorphic.
See also
--------
to_nested_tuple
from_prufer_sequence
Examples
--------
Sensible relabeling ensures that the nodes are labeled from the root
starting at 0::
>>> balanced = (((), ()), ((), ()))
>>> T = nx.from_nested_tuple(balanced, sensible_relabeling=True)
>>> edges = [(0, 1), (0, 2), (1, 3), (1, 4), (2, 5), (2, 6)]
>>> all((u, v) in T.edges() or (v, u) in T.edges() for (u, v) in edges)
True
"""
def _make_tree(sequence):
"""Recursively creates a tree from the given sequence of nested
tuples.
This function employs the :func:`~networkx.tree.join` function
to recursively join subtrees into a larger tree.
"""
# The empty sequence represents the empty tree, which is the
# (unique) graph with a single node. We mark the single node
# with an attribute that indicates that it is the root of the
# graph.
if len(sequence) == 0:
return nx.empty_graph(1)
# For a nonempty sequence, get the subtrees for each child
# sequence and join all the subtrees at their roots. After
# joining the subtrees, the root is node 0.
return nx.tree.join([(_make_tree(child), 0) for child in sequence])
# Make the tree and remove the `is_root` node attribute added by the
# helper function.
T = _make_tree(sequence)
if sensible_relabeling:
# Relabel the nodes according to their breadth-first search
# order, starting from the root node (that is, the node 0).
bfs_nodes = chain([0], (v for u, v in nx.bfs_edges(T, 0)))
labels = {v: i for i, v in enumerate(bfs_nodes)}
# We would like to use `copy=False`, but `relabel_nodes` doesn't
# allow a relabel mapping that can't be topologically sorted.
T = nx.relabel_nodes(T, labels)
return T
@not_implemented_for("directed")
def to_prufer_sequence(T):
r"""Returns the Prüfer sequence of the given tree.
A *Prüfer sequence* is a list of *n* - 2 numbers between 0 and
*n* - 1, inclusive. The tree corresponding to a given Prüfer
sequence can be recovered by repeatedly joining a node in the
sequence with a node with the smallest potential degree according to
the sequence.
Parameters
----------
T : NetworkX graph
An undirected graph object representing a tree.
Returns
-------
list
The Prüfer sequence of the given tree.
Raises
------
NetworkXPointlessConcept
If the number of nodes in `T` is less than two.
NotATree
If `T` is not a tree.
KeyError
If the set of nodes in `T` is not {0, , *n* - 1}.
Notes
-----
There is a bijection from labeled trees to Prüfer sequences. This
function is the inverse of the :func:`from_prufer_sequence`
function.
Sometimes Prüfer sequences use nodes labeled from 1 to *n* instead
of from 0 to *n* - 1. This function requires nodes to be labeled in
the latter form. You can use :func:`~networkx.relabel_nodes` to
relabel the nodes of your tree to the appropriate format.
This implementation is from [1]_ and has a running time of
$O(n)$.
See also
--------
to_nested_tuple
from_prufer_sequence
References
----------
.. [1] Wang, Xiaodong, Lei Wang, and Yingjie Wu.
"An optimal algorithm for Prufer codes."
*Journal of Software Engineering and Applications* 2.02 (2009): 111.
<https://doi.org/10.4236/jsea.2009.22016>
Examples
--------
There is a bijection between Prüfer sequences and labeled trees, so
this function is the inverse of the :func:`from_prufer_sequence`
function:
>>> edges = [(0, 3), (1, 3), (2, 3), (3, 4), (4, 5)]
>>> tree = nx.Graph(edges)
>>> sequence = nx.to_prufer_sequence(tree)
>>> sequence
[3, 3, 3, 4]
>>> tree2 = nx.from_prufer_sequence(sequence)
>>> list(tree2.edges()) == edges
True
"""
# Perform some sanity checks on the input.
n = len(T)
if n < 2:
msg = "Prüfer sequence undefined for trees with fewer than two nodes"
raise nx.NetworkXPointlessConcept(msg)
if not nx.is_tree(T):
raise nx.NotATree("provided graph is not a tree")
if set(T) != set(range(n)):
raise KeyError("tree must have node labels {0, ..., n - 1}")
degree = dict(T.degree())
def parents(u):
return next(v for v in T[u] if degree[v] > 1)
index = u = next(k for k in range(n) if degree[k] == 1)
result = []
for i in range(n - 2):
v = parents(u)
result.append(v)
degree[v] -= 1
if v < index and degree[v] == 1:
u = v
else:
index = u = next(k for k in range(index + 1, n) if degree[k] == 1)
return result
def from_prufer_sequence(sequence):
r"""Returns the tree corresponding to the given Prüfer sequence.
A *Prüfer sequence* is a list of *n* - 2 numbers between 0 and
*n* - 1, inclusive. The tree corresponding to a given Prüfer
sequence can be recovered by repeatedly joining a node in the
sequence with a node with the smallest potential degree according to
the sequence.
Parameters
----------
sequence : list
A Prüfer sequence, which is a list of *n* - 2 integers between
zero and *n* - 1, inclusive.
Returns
-------
NetworkX graph
The tree corresponding to the given Prüfer sequence.
Notes
-----
There is a bijection from labeled trees to Prüfer sequences. This
function is the inverse of the :func:`from_prufer_sequence` function.
Sometimes Prüfer sequences use nodes labeled from 1 to *n* instead
of from 0 to *n* - 1. This function requires nodes to be labeled in
the latter form. You can use :func:`networkx.relabel_nodes` to
relabel the nodes of your tree to the appropriate format.
This implementation is from [1]_ and has a running time of
$O(n)$.
References
----------
.. [1] Wang, Xiaodong, Lei Wang, and Yingjie Wu.
"An optimal algorithm for Prufer codes."
*Journal of Software Engineering and Applications* 2.02 (2009): 111.
<https://doi.org/10.4236/jsea.2009.22016>
See also
--------
from_nested_tuple
to_prufer_sequence
Examples
--------
There is a bijection between Prüfer sequences and labeled trees, so
this function is the inverse of the :func:`to_prufer_sequence`
function:
>>> edges = [(0, 3), (1, 3), (2, 3), (3, 4), (4, 5)]
>>> tree = nx.Graph(edges)
>>> sequence = nx.to_prufer_sequence(tree)
>>> sequence
[3, 3, 3, 4]
>>> tree2 = nx.from_prufer_sequence(sequence)
>>> list(tree2.edges()) == edges
True
"""
n = len(sequence) + 2
# `degree` stores the remaining degree (plus one) for each node. The
# degree of a node in the decoded tree is one more than the number
# of times it appears in the code.
degree = Counter(chain(sequence, range(n)))
T = nx.empty_graph(n)
# `not_orphaned` is the set of nodes that have a parent in the
# tree. After the loop, there should be exactly two nodes that are
# not in this set.
not_orphaned = set()
index = u = next(k for k in range(n) if degree[k] == 1)
for v in sequence:
T.add_edge(u, v)
not_orphaned.add(u)
degree[v] -= 1
if v < index and degree[v] == 1:
u = v
else:
index = u = next(k for k in range(index + 1, n) if degree[k] == 1)
# At this point, there must be exactly two orphaned nodes; join them.
orphans = set(T) - not_orphaned
u, v = orphans
T.add_edge(u, v)
return T

View file

@ -0,0 +1,86 @@
r"""Function for computing a junction tree of a graph."""
import networkx as nx
from networkx.utils import not_implemented_for
from networkx.algorithms import moral, complete_to_chordal_graph, chordal_graph_cliques
from itertools import combinations
__all__ = ["junction_tree"]
@not_implemented_for("multigraph", "MultiDiGraph")
def junction_tree(G):
r"""Returns a junction tree of a given graph.
A junction tree (or clique tree) is constructed from a (un)directed graph G.
The tree is constructed based on a moralized and triangulated version of G.
The tree's nodes consist of maximal cliques and sepsets of the revised graph.
The sepset of two cliques is the intersection of the nodes of these cliques,
e.g. the sepset of (A,B,C) and (A,C,E,F) is (A,C). These nodes are often called
"variables" in this literature. The tree is bipartitie with each sepset
connected to its two cliques.
Junction Trees are not unique as the order of clique consideration determines
which sepsets are included.
The junction tree algorithm consists of five steps [1]_:
1. Moralize the graph
2. Triangulate the graph
3. Find maximal cliques
4. Build the tree from cliques, connecting cliques with shared
nodes, set edge-weight to number of shared variables
5. Find maximum spanning tree
Parameters
----------
G : networkx.Graph
Directed or undirected graph.
Returns
-------
junction_tree : networkx.Graph
The corresponding junction tree of `G`.
Raises
------
NetworkXNotImplemented
Raised if `G` is an instance of `MultiGraph` or `MultiDiGraph`.
References
----------
.. [1] Junction tree algorithm:
https://en.wikipedia.org/wiki/Junction_tree_algorithm
.. [2] Finn V. Jensen and Frank Jensen. 1994. Optimal
junction trees. In Proceedings of the Tenth international
conference on Uncertainty in artificial intelligence (UAI94).
Morgan Kaufmann Publishers Inc., San Francisco, CA, USA, 360366.
"""
clique_graph = nx.Graph()
if G.is_directed():
G = moral.moral_graph(G)
chordal_graph, _ = complete_to_chordal_graph(G)
cliques = [tuple(sorted(i)) for i in chordal_graph_cliques(chordal_graph)]
clique_graph.add_nodes_from(cliques, type="clique")
for edge in combinations(cliques, 2):
set_edge_0 = set(edge[0])
set_edge_1 = set(edge[1])
if not set_edge_0.isdisjoint(set_edge_1):
sepset = tuple(sorted(set_edge_0.intersection(set_edge_1)))
clique_graph.add_edge(edge[0], edge[1], weight=len(sepset), sepset=sepset)
junction_tree = nx.maximum_spanning_tree(clique_graph)
for edge in list(junction_tree.edges(data=True)):
junction_tree.add_node(edge[2]["sepset"], type="sepset")
junction_tree.add_edge(edge[0], edge[2]["sepset"])
junction_tree.add_edge(edge[1], edge[2]["sepset"])
junction_tree.remove_edge(edge[0], edge[1])
return junction_tree

View file

@ -0,0 +1,612 @@
"""
Algorithms for calculating min/max spanning trees/forests.
"""
from heapq import heappop, heappush
from operator import itemgetter
from itertools import count
from math import isnan
import networkx as nx
from networkx.utils import UnionFind, not_implemented_for
__all__ = [
"minimum_spanning_edges",
"maximum_spanning_edges",
"minimum_spanning_tree",
"maximum_spanning_tree",
]
@not_implemented_for("multigraph")
def boruvka_mst_edges(
G, minimum=True, weight="weight", keys=False, data=True, ignore_nan=False
):
"""Iterate over edges of a Borůvka's algorithm min/max spanning tree.
Parameters
----------
G : NetworkX Graph
The edges of `G` must have distinct weights,
otherwise the edges may not form a tree.
minimum : bool (default: True)
Find the minimum (True) or maximum (False) spanning tree.
weight : string (default: 'weight')
The name of the edge attribute holding the edge weights.
keys : bool (default: True)
This argument is ignored since this function is not
implemented for multigraphs; it exists only for consistency
with the other minimum spanning tree functions.
data : bool (default: True)
Flag for whether to yield edge attribute dicts.
If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
If False, yield edges `(u, v)`.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
"""
# Initialize a forest, assuming initially that it is the discrete
# partition of the nodes of the graph.
forest = UnionFind(G)
def best_edge(component):
"""Returns the optimum (minimum or maximum) edge on the edge
boundary of the given set of nodes.
A return value of ``None`` indicates an empty boundary.
"""
sign = 1 if minimum else -1
minwt = float("inf")
boundary = None
for e in nx.edge_boundary(G, component, data=True):
wt = e[-1].get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = f"NaN found as an edge weight. Edge {e}"
raise ValueError(msg)
if wt < minwt:
minwt = wt
boundary = e
return boundary
# Determine the optimum edge in the edge boundary of each component
# in the forest.
best_edges = (best_edge(component) for component in forest.to_sets())
best_edges = [edge for edge in best_edges if edge is not None]
# If each entry was ``None``, that means the graph was disconnected,
# so we are done generating the forest.
while best_edges:
# Determine the optimum edge in the edge boundary of each
# component in the forest.
#
# This must be a sequence, not an iterator. In this list, the
# same edge may appear twice, in different orientations (but
# that's okay, since a union operation will be called on the
# endpoints the first time it is seen, but not the second time).
#
# Any ``None`` indicates that the edge boundary for that
# component was empty, so that part of the forest has been
# completed.
#
# TODO This can be parallelized, both in the outer loop over
# each component in the forest and in the computation of the
# minimum. (Same goes for the identical lines outside the loop.)
best_edges = (best_edge(component) for component in forest.to_sets())
best_edges = [edge for edge in best_edges if edge is not None]
# Join trees in the forest using the best edges, and yield that
# edge, since it is part of the spanning tree.
#
# TODO This loop can be parallelized, to an extent (the union
# operation must be atomic).
for u, v, d in best_edges:
if forest[u] != forest[v]:
if data:
yield u, v, d
else:
yield u, v
forest.union(u, v)
def kruskal_mst_edges(
G, minimum, weight="weight", keys=True, data=True, ignore_nan=False
):
"""Iterate over edges of a Kruskal's algorithm min/max spanning tree.
Parameters
----------
G : NetworkX Graph
The graph holding the tree of interest.
minimum : bool (default: True)
Find the minimum (True) or maximum (False) spanning tree.
weight : string (default: 'weight')
The name of the edge attribute holding the edge weights.
keys : bool (default: True)
If `G` is a multigraph, `keys` controls whether edge keys ar yielded.
Otherwise `keys` is ignored.
data : bool (default: True)
Flag for whether to yield edge attribute dicts.
If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
If False, yield edges `(u, v)`.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
"""
subtrees = UnionFind()
if G.is_multigraph():
edges = G.edges(keys=True, data=True)
def filter_nan_edges(edges=edges, weight=weight):
sign = 1 if minimum else -1
for u, v, k, d in edges:
wt = d.get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = f"NaN found as an edge weight. Edge {(u, v, k, d)}"
raise ValueError(msg)
yield wt, u, v, k, d
else:
edges = G.edges(data=True)
def filter_nan_edges(edges=edges, weight=weight):
sign = 1 if minimum else -1
for u, v, d in edges:
wt = d.get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = f"NaN found as an edge weight. Edge {(u, v, d)}"
raise ValueError(msg)
yield wt, u, v, d
edges = sorted(filter_nan_edges(), key=itemgetter(0))
# Multigraphs need to handle edge keys in addition to edge data.
if G.is_multigraph():
for wt, u, v, k, d in edges:
if subtrees[u] != subtrees[v]:
if keys:
if data:
yield u, v, k, d
else:
yield u, v, k
else:
if data:
yield u, v, d
else:
yield u, v
subtrees.union(u, v)
else:
for wt, u, v, d in edges:
if subtrees[u] != subtrees[v]:
if data:
yield (u, v, d)
else:
yield (u, v)
subtrees.union(u, v)
def prim_mst_edges(G, minimum, weight="weight", keys=True, data=True, ignore_nan=False):
"""Iterate over edges of Prim's algorithm min/max spanning tree.
Parameters
----------
G : NetworkX Graph
The graph holding the tree of interest.
minimum : bool (default: True)
Find the minimum (True) or maximum (False) spanning tree.
weight : string (default: 'weight')
The name of the edge attribute holding the edge weights.
keys : bool (default: True)
If `G` is a multigraph, `keys` controls whether edge keys ar yielded.
Otherwise `keys` is ignored.
data : bool (default: True)
Flag for whether to yield edge attribute dicts.
If True, yield edges `(u, v, d)`, where `d` is the attribute dict.
If False, yield edges `(u, v)`.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
"""
is_multigraph = G.is_multigraph()
push = heappush
pop = heappop
nodes = set(G)
c = count()
sign = 1 if minimum else -1
while nodes:
u = nodes.pop()
frontier = []
visited = {u}
if is_multigraph:
for v, keydict in G.adj[u].items():
for k, d in keydict.items():
wt = d.get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = f"NaN found as an edge weight. Edge {(u, v, k, d)}"
raise ValueError(msg)
push(frontier, (wt, next(c), u, v, k, d))
else:
for v, d in G.adj[u].items():
wt = d.get(weight, 1) * sign
if isnan(wt):
if ignore_nan:
continue
msg = f"NaN found as an edge weight. Edge {(u, v, d)}"
raise ValueError(msg)
push(frontier, (wt, next(c), u, v, d))
while frontier:
if is_multigraph:
W, _, u, v, k, d = pop(frontier)
else:
W, _, u, v, d = pop(frontier)
if v in visited or v not in nodes:
continue
# Multigraphs need to handle edge keys in addition to edge data.
if is_multigraph and keys:
if data:
yield u, v, k, d
else:
yield u, v, k
else:
if data:
yield u, v, d
else:
yield u, v
# update frontier
visited.add(v)
nodes.discard(v)
if is_multigraph:
for w, keydict in G.adj[v].items():
if w in visited:
continue
for k2, d2 in keydict.items():
new_weight = d2.get(weight, 1) * sign
push(frontier, (new_weight, next(c), v, w, k2, d2))
else:
for w, d2 in G.adj[v].items():
if w in visited:
continue
new_weight = d2.get(weight, 1) * sign
push(frontier, (new_weight, next(c), v, w, d2))
ALGORITHMS = {
"boruvka": boruvka_mst_edges,
"borůvka": boruvka_mst_edges,
"kruskal": kruskal_mst_edges,
"prim": prim_mst_edges,
}
@not_implemented_for("directed")
def minimum_spanning_edges(
G, algorithm="kruskal", weight="weight", keys=True, data=True, ignore_nan=False
):
"""Generate edges in a minimum spanning forest of an undirected
weighted graph.
A minimum spanning tree is a subgraph of the graph (a tree)
with the minimum sum of edge weights. A spanning forest is a
union of the spanning trees for each connected component of the graph.
Parameters
----------
G : undirected Graph
An undirected graph. If `G` is connected, then the algorithm finds a
spanning tree. Otherwise, a spanning forest is found.
algorithm : string
The algorithm to use when finding a minimum spanning tree. Valid
choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
weight : string
Edge data key to use for weight (default 'weight').
keys : bool
Whether to yield edge key in multigraphs in addition to the edge.
If `G` is not a multigraph, this is ignored.
data : bool, optional
If True yield the edge data along with the edge.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
Returns
-------
edges : iterator
An iterator over edges in a maximum spanning tree of `G`.
Edges connecting nodes `u` and `v` are represented as tuples:
`(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
If `G` is a multigraph, `keys` indicates whether the edge key `k` will
be reported in the third position in the edge tuple. `data` indicates
whether the edge datadict `d` will appear at the end of the edge tuple.
If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True
or `(u, v)` if `data` is False.
Examples
--------
>>> from networkx.algorithms import tree
Find minimum spanning edges by Kruskal's algorithm
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> mst = tree.minimum_spanning_edges(G, algorithm="kruskal", data=False)
>>> edgelist = list(mst)
>>> sorted(sorted(e) for e in edgelist)
[[0, 1], [1, 2], [2, 3]]
Find minimum spanning edges by Prim's algorithm
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> mst = tree.minimum_spanning_edges(G, algorithm="prim", data=False)
>>> edgelist = list(mst)
>>> sorted(sorted(e) for e in edgelist)
[[0, 1], [1, 2], [2, 3]]
Notes
-----
For Borůvka's algorithm, each edge must have a weight attribute, and
each edge weight must be distinct.
For the other algorithms, if the graph edges do not have a weight
attribute a default weight of 1 will be used.
Modified code from David Eppstein, April 2006
http://www.ics.uci.edu/~eppstein/PADS/
"""
try:
algo = ALGORITHMS[algorithm]
except KeyError as e:
msg = f"{algorithm} is not a valid choice for an algorithm."
raise ValueError(msg) from e
return algo(
G, minimum=True, weight=weight, keys=keys, data=data, ignore_nan=ignore_nan
)
@not_implemented_for("directed")
def maximum_spanning_edges(
G, algorithm="kruskal", weight="weight", keys=True, data=True, ignore_nan=False
):
"""Generate edges in a maximum spanning forest of an undirected
weighted graph.
A maximum spanning tree is a subgraph of the graph (a tree)
with the maximum possible sum of edge weights. A spanning forest is a
union of the spanning trees for each connected component of the graph.
Parameters
----------
G : undirected Graph
An undirected graph. If `G` is connected, then the algorithm finds a
spanning tree. Otherwise, a spanning forest is found.
algorithm : string
The algorithm to use when finding a maximum spanning tree. Valid
choices are 'kruskal', 'prim', or 'boruvka'. The default is 'kruskal'.
weight : string
Edge data key to use for weight (default 'weight').
keys : bool
Whether to yield edge key in multigraphs in addition to the edge.
If `G` is not a multigraph, this is ignored.
data : bool, optional
If True yield the edge data along with the edge.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
Returns
-------
edges : iterator
An iterator over edges in a maximum spanning tree of `G`.
Edges connecting nodes `u` and `v` are represented as tuples:
`(u, v, k, d)` or `(u, v, k)` or `(u, v, d)` or `(u, v)`
If `G` is a multigraph, `keys` indicates whether the edge key `k` will
be reported in the third position in the edge tuple. `data` indicates
whether the edge datadict `d` will appear at the end of the edge tuple.
If `G` is not a multigraph, the tuples are `(u, v, d)` if `data` is True
or `(u, v)` if `data` is False.
Examples
--------
>>> from networkx.algorithms import tree
Find maximum spanning edges by Kruskal's algorithm
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> mst = tree.maximum_spanning_edges(G, algorithm="kruskal", data=False)
>>> edgelist = list(mst)
>>> sorted(sorted(e) for e in edgelist)
[[0, 1], [0, 3], [1, 2]]
Find maximum spanning edges by Prim's algorithm
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2) # assign weight 2 to edge 0-3
>>> mst = tree.maximum_spanning_edges(G, algorithm="prim", data=False)
>>> edgelist = list(mst)
>>> sorted(sorted(e) for e in edgelist)
[[0, 1], [0, 3], [2, 3]]
Notes
-----
For Borůvka's algorithm, each edge must have a weight attribute, and
each edge weight must be distinct.
For the other algorithms, if the graph edges do not have a weight
attribute a default weight of 1 will be used.
Modified code from David Eppstein, April 2006
http://www.ics.uci.edu/~eppstein/PADS/
"""
try:
algo = ALGORITHMS[algorithm]
except KeyError as e:
msg = f"{algorithm} is not a valid choice for an algorithm."
raise ValueError(msg) from e
return algo(
G, minimum=False, weight=weight, keys=keys, data=data, ignore_nan=ignore_nan
)
def minimum_spanning_tree(G, weight="weight", algorithm="kruskal", ignore_nan=False):
"""Returns a minimum spanning tree or forest on an undirected graph `G`.
Parameters
----------
G : undirected graph
An undirected graph. If `G` is connected, then the algorithm finds a
spanning tree. Otherwise, a spanning forest is found.
weight : str
Data key to use for edge weights.
algorithm : string
The algorithm to use when finding a minimum spanning tree. Valid
choices are 'kruskal', 'prim', or 'boruvka'. The default is
'kruskal'.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
Returns
-------
G : NetworkX Graph
A minimum spanning tree or forest.
Examples
--------
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> T = nx.minimum_spanning_tree(G)
>>> sorted(T.edges(data=True))
[(0, 1, {}), (1, 2, {}), (2, 3, {})]
Notes
-----
For Borůvka's algorithm, each edge must have a weight attribute, and
each edge weight must be distinct.
For the other algorithms, if the graph edges do not have a weight
attribute a default weight of 1 will be used.
There may be more than one tree with the same minimum or maximum weight.
See :mod:`networkx.tree.recognition` for more detailed definitions.
Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
"""
edges = minimum_spanning_edges(
G, algorithm, weight, keys=True, data=True, ignore_nan=ignore_nan
)
T = G.__class__() # Same graph class as G
T.graph.update(G.graph)
T.add_nodes_from(G.nodes.items())
T.add_edges_from(edges)
return T
def maximum_spanning_tree(G, weight="weight", algorithm="kruskal", ignore_nan=False):
"""Returns a maximum spanning tree or forest on an undirected graph `G`.
Parameters
----------
G : undirected graph
An undirected graph. If `G` is connected, then the algorithm finds a
spanning tree. Otherwise, a spanning forest is found.
weight : str
Data key to use for edge weights.
algorithm : string
The algorithm to use when finding a maximum spanning tree. Valid
choices are 'kruskal', 'prim', or 'boruvka'. The default is
'kruskal'.
ignore_nan : bool (default: False)
If a NaN is found as an edge weight normally an exception is raised.
If `ignore_nan is True` then that edge is ignored instead.
Returns
-------
G : NetworkX Graph
A maximum spanning tree or forest.
Examples
--------
>>> G = nx.cycle_graph(4)
>>> G.add_edge(0, 3, weight=2)
>>> T = nx.maximum_spanning_tree(G)
>>> sorted(T.edges(data=True))
[(0, 1, {}), (0, 3, {'weight': 2}), (1, 2, {})]
Notes
-----
For Borůvka's algorithm, each edge must have a weight attribute, and
each edge weight must be distinct.
For the other algorithms, if the graph edges do not have a weight
attribute a default weight of 1 will be used.
There may be more than one tree with the same minimum or maximum weight.
See :mod:`networkx.tree.recognition` for more detailed definitions.
Isolated nodes with self-loops are in the tree as edgeless isolated nodes.
"""
edges = maximum_spanning_edges(
G, algorithm, weight, keys=True, data=True, ignore_nan=ignore_nan
)
edges = list(edges)
T = G.__class__() # Same graph class as G
T.graph.update(G.graph)
T.add_nodes_from(G.nodes.items())
T.add_edges_from(edges)
return T

View file

@ -0,0 +1,107 @@
"""Operations on trees."""
from functools import partial
from itertools import chain
import networkx as nx
from itertools import accumulate
__all__ = ["join"]
def join(rooted_trees, label_attribute=None):
"""Returns a new rooted tree with a root node joined with the roots
of each of the given rooted trees.
Parameters
----------
rooted_trees : list
A list of pairs in which each left element is a NetworkX graph
object representing a tree and each right element is the root
node of that tree. The nodes of these trees will be relabeled to
integers.
label_attribute : str
If provided, the old node labels will be stored in the new tree
under this node attribute. If not provided, the node attribute
``'_old'`` will store the original label of the node in the
rooted trees given in the input.
Returns
-------
NetworkX graph
The rooted tree whose subtrees are the given rooted trees. The
new root node is labeled 0. Each non-root node has an attribute,
as described under the keyword argument ``label_attribute``,
that indicates the label of the original node in the input tree.
Notes
-----
Graph, edge, and node attributes are propagated from the given
rooted trees to the created tree. If there are any overlapping graph
attributes, those from later trees will overwrite those from earlier
trees in the tuple of positional arguments.
Examples
--------
Join two full balanced binary trees of height *h* to get a full
balanced binary tree of depth *h* + 1::
>>> h = 4
>>> left = nx.balanced_tree(2, h)
>>> right = nx.balanced_tree(2, h)
>>> joined_tree = nx.join([(left, 0), (right, 0)])
>>> nx.is_isomorphic(joined_tree, nx.balanced_tree(2, h + 1))
True
"""
if len(rooted_trees) == 0:
return nx.empty_graph(1)
# Unzip the zipped list of (tree, root) pairs.
trees, roots = zip(*rooted_trees)
# The join of the trees has the same type as the type of the first
# tree.
R = type(trees[0])()
# Relabel the nodes so that their union is the integers starting at 1.
if label_attribute is None:
label_attribute = "_old"
relabel = partial(
nx.convert_node_labels_to_integers, label_attribute=label_attribute
)
lengths = (len(tree) for tree in trees[:-1])
first_labels = chain([0], accumulate(lengths))
trees = [
relabel(tree, first_label=first_label + 1)
for tree, first_label in zip(trees, first_labels)
]
# Get the relabeled roots.
roots = [
next(v for v, d in tree.nodes(data=True) if d.get("_old") == root)
for tree, root in zip(trees, roots)
]
# Remove the old node labels.
for tree in trees:
for v in tree:
tree.nodes[v].pop("_old")
# Add all sets of nodes and edges, with data.
nodes = (tree.nodes(data=True) for tree in trees)
edges = (tree.edges(data=True) for tree in trees)
R.add_nodes_from(chain.from_iterable(nodes))
R.add_edges_from(chain.from_iterable(edges))
# Add graph attributes; later attributes take precedent over earlier
# attributes.
for tree in trees:
R.graph.update(tree.graph)
# Finally, join the subtrees at the root. We know 0 is unused by the
# way we relabeled the subtrees.
R.add_node(0)
R.add_edges_from((0, root) for root in roots)
return R

View file

@ -0,0 +1,220 @@
"""
Recognition Tests
=================
A *forest* is an acyclic, undirected graph, and a *tree* is a connected forest.
Depending on the subfield, there are various conventions for generalizing these
definitions to directed graphs.
In one convention, directed variants of forest and tree are defined in an
identical manner, except that the direction of the edges is ignored. In effect,
each directed edge is treated as a single undirected edge. Then, additional
restrictions are imposed to define *branchings* and *arborescences*.
In another convention, directed variants of forest and tree correspond to
the previous convention's branchings and arborescences, respectively. Then two
new terms, *polyforest* and *polytree*, are defined to correspond to the other
convention's forest and tree.
Summarizing::
+-----------------------------+
| Convention A | Convention B |
+=============================+
| forest | polyforest |
| tree | polytree |
| branching | forest |
| arborescence | tree |
+-----------------------------+
Each convention has its reasons. The first convention emphasizes definitional
similarity in that directed forests and trees are only concerned with
acyclicity and do not have an in-degree constraint, just as their undirected
counterparts do not. The second convention emphasizes functional similarity
in the sense that the directed analog of a spanning tree is a spanning
arborescence. That is, take any spanning tree and choose one node as the root.
Then every edge is assigned a direction such there is a directed path from the
root to every other node. The result is a spanning arborescence.
NetworkX follows convention "A". Explicitly, these are:
undirected forest
An undirected graph with no undirected cycles.
undirected tree
A connected, undirected forest.
directed forest
A directed graph with no undirected cycles. Equivalently, the underlying
graph structure (which ignores edge orientations) is an undirected forest.
In convention B, this is known as a polyforest.
directed tree
A weakly connected, directed forest. Equivalently, the underlying graph
structure (which ignores edge orientations) is an undirected tree. In
convention B, this is known as a polytree.
branching
A directed forest with each node having, at most, one parent. So the maximum
in-degree is equal to 1. In convention B, this is known as a forest.
arborescence
A directed tree with each node having, at most, one parent. So the maximum
in-degree is equal to 1. In convention B, this is known as a tree.
For trees and arborescences, the adjective "spanning" may be added to designate
that the graph, when considered as a forest/branching, consists of a single
tree/arborescence that includes all nodes in the graph. It is true, by
definition, that every tree/arborescence is spanning with respect to the nodes
that define the tree/arborescence and so, it might seem redundant to introduce
the notion of "spanning". However, the nodes may represent a subset of
nodes from a larger graph, and it is in this context that the term "spanning"
becomes a useful notion.
"""
import networkx as nx
__all__ = ["is_arborescence", "is_branching", "is_forest", "is_tree"]
@nx.utils.not_implemented_for("undirected")
def is_arborescence(G):
"""
Returns True if `G` is an arborescence.
An arborescence is a directed tree with maximum in-degree equal to 1.
Parameters
----------
G : graph
The graph to test.
Returns
-------
b : bool
A boolean that is True if `G` is an arborescence.
Notes
-----
In another convention, an arborescence is known as a *tree*.
See Also
--------
is_tree
"""
return is_tree(G) and max(d for n, d in G.in_degree()) <= 1
@nx.utils.not_implemented_for("undirected")
def is_branching(G):
"""
Returns True if `G` is a branching.
A branching is a directed forest with maximum in-degree equal to 1.
Parameters
----------
G : directed graph
The directed graph to test.
Returns
-------
b : bool
A boolean that is True if `G` is a branching.
Notes
-----
In another convention, a branching is also known as a *forest*.
See Also
--------
is_forest
"""
return is_forest(G) and max(d for n, d in G.in_degree()) <= 1
def is_forest(G):
"""
Returns True if `G` is a forest.
A forest is a graph with no undirected cycles.
For directed graphs, `G` is a forest if the underlying graph is a forest.
The underlying graph is obtained by treating each directed edge as a single
undirected edge in a multigraph.
Parameters
----------
G : graph
The graph to test.
Returns
-------
b : bool
A boolean that is True if `G` is a forest.
Notes
-----
In another convention, a directed forest is known as a *polyforest* and
then *forest* corresponds to a *branching*.
See Also
--------
is_branching
"""
if len(G) == 0:
raise nx.exception.NetworkXPointlessConcept("G has no nodes.")
if G.is_directed():
components = (G.subgraph(c) for c in nx.weakly_connected_components(G))
else:
components = (G.subgraph(c) for c in nx.connected_components(G))
return all(len(c) - 1 == c.number_of_edges() for c in components)
def is_tree(G):
"""
Returns True if `G` is a tree.
A tree is a connected graph with no undirected cycles.
For directed graphs, `G` is a tree if the underlying graph is a tree. The
underlying graph is obtained by treating each directed edge as a single
undirected edge in a multigraph.
Parameters
----------
G : graph
The graph to test.
Returns
-------
b : bool
A boolean that is True if `G` is a tree.
Notes
-----
In another convention, a directed tree is known as a *polytree* and then
*tree* corresponds to an *arborescence*.
See Also
--------
is_arborescence
"""
if len(G) == 0:
raise nx.exception.NetworkXPointlessConcept("G has no nodes.")
if G.is_directed():
is_connected = nx.is_weakly_connected
else:
is_connected = nx.is_connected
# A connected graph with no cycles has n-1 edges.
return len(G) - 1 == G.number_of_edges() and is_connected(G)

View file

@ -0,0 +1,451 @@
import pytest
np = pytest.importorskip("numpy")
import networkx as nx
from networkx.algorithms.tree import branchings
from networkx.algorithms.tree import recognition
#
# Explicitly discussed examples from Edmonds paper.
#
# Used in Figures A-F.
#
# fmt: off
G_array = np.array([
# 0 1 2 3 4 5 6 7 8
[0, 0, 12, 0, 12, 0, 0, 0, 0], # 0
[4, 0, 0, 0, 0, 13, 0, 0, 0], # 1
[0, 17, 0, 21, 0, 12, 0, 0, 0], # 2
[5, 0, 0, 0, 17, 0, 18, 0, 0], # 3
[0, 0, 0, 0, 0, 0, 0, 12, 0], # 4
[0, 0, 0, 0, 0, 0, 14, 0, 12], # 5
[0, 0, 21, 0, 0, 0, 0, 0, 15], # 6
[0, 0, 0, 19, 0, 0, 15, 0, 0], # 7
[0, 0, 0, 0, 0, 0, 0, 18, 0], # 8
], dtype=int)
# fmt: on
def G1():
G = nx.from_numpy_array(G_array, create_using=nx.MultiDiGraph)
return G
def G2():
# Now we shift all the weights by -10.
# Should not affect optimal arborescence, but does affect optimal branching.
Garr = G_array.copy()
Garr[np.nonzero(Garr)] -= 10
G = nx.from_numpy_array(Garr, create_using=nx.MultiDiGraph)
return G
# An optimal branching for G1 that is also a spanning arborescence. So it is
# also an optimal spanning arborescence.
#
optimal_arborescence_1 = [
(0, 2, 12),
(2, 1, 17),
(2, 3, 21),
(1, 5, 13),
(3, 4, 17),
(3, 6, 18),
(6, 8, 15),
(8, 7, 18),
]
# For G2, the optimal branching of G1 (with shifted weights) is no longer
# an optimal branching, but it is still an optimal spanning arborescence
# (just with shifted weights). An optimal branching for G2 is similar to what
# appears in figure G (this is greedy_subopt_branching_1a below), but with the
# edge (3, 0, 5), which is now (3, 0, -5), removed. Thus, the optimal branching
# is not a spanning arborescence. The code finds optimal_branching_2a.
# An alternative and equivalent branching is optimal_branching_2b. We would
# need to modify the code to iterate through all equivalent optimal branchings.
#
# These are maximal branchings or arborescences.
optimal_branching_2a = [
(5, 6, 4),
(6, 2, 11),
(6, 8, 5),
(8, 7, 8),
(2, 1, 7),
(2, 3, 11),
(3, 4, 7),
]
optimal_branching_2b = [
(8, 7, 8),
(7, 3, 9),
(3, 4, 7),
(3, 6, 8),
(6, 2, 11),
(2, 1, 7),
(1, 5, 3),
]
optimal_arborescence_2 = [
(0, 2, 2),
(2, 1, 7),
(2, 3, 11),
(1, 5, 3),
(3, 4, 7),
(3, 6, 8),
(6, 8, 5),
(8, 7, 8),
]
# Two suboptimal maximal branchings on G1 obtained from a greedy algorithm.
# 1a matches what is shown in Figure G in Edmonds's paper.
greedy_subopt_branching_1a = [
(5, 6, 14),
(6, 2, 21),
(6, 8, 15),
(8, 7, 18),
(2, 1, 17),
(2, 3, 21),
(3, 0, 5),
(3, 4, 17),
]
greedy_subopt_branching_1b = [
(8, 7, 18),
(7, 6, 15),
(6, 2, 21),
(2, 1, 17),
(2, 3, 21),
(1, 5, 13),
(3, 0, 5),
(3, 4, 17),
]
def build_branching(edges):
G = nx.DiGraph()
for u, v, weight in edges:
G.add_edge(u, v, weight=weight)
return G
def sorted_edges(G, attr="weight", default=1):
edges = [(u, v, data.get(attr, default)) for (u, v, data) in G.edges(data=True)]
edges = sorted(edges, key=lambda x: (x[2], x[1], x[0]))
return edges
def assert_equal_branchings(G1, G2, attr="weight", default=1):
edges1 = list(G1.edges(data=True))
edges2 = list(G2.edges(data=True))
assert len(edges1) == len(edges2)
# Grab the weights only.
e1 = sorted_edges(G1, attr, default)
e2 = sorted_edges(G2, attr, default)
# If we have an exception, let's see the edges.
print(e1)
print(e2)
print
for a, b in zip(e1, e2):
assert a[:2] == b[:2]
np.testing.assert_almost_equal(a[2], b[2])
################
def test_optimal_branching1():
G = build_branching(optimal_arborescence_1)
assert recognition.is_arborescence(G), True
assert branchings.branching_weight(G) == 131
def test_optimal_branching2a():
G = build_branching(optimal_branching_2a)
assert recognition.is_arborescence(G), True
assert branchings.branching_weight(G) == 53
def test_optimal_branching2b():
G = build_branching(optimal_branching_2b)
assert recognition.is_arborescence(G), True
assert branchings.branching_weight(G) == 53
def test_optimal_arborescence2():
G = build_branching(optimal_arborescence_2)
assert recognition.is_arborescence(G), True
assert branchings.branching_weight(G) == 51
def test_greedy_suboptimal_branching1a():
G = build_branching(greedy_subopt_branching_1a)
assert recognition.is_arborescence(G), True
assert branchings.branching_weight(G) == 128
def test_greedy_suboptimal_branching1b():
G = build_branching(greedy_subopt_branching_1b)
assert recognition.is_arborescence(G), True
assert branchings.branching_weight(G) == 127
def test_greedy_max1():
# Standard test.
#
G = G1()
B = branchings.greedy_branching(G)
# There are only two possible greedy branchings. The sorting is such
# that it should equal the second suboptimal branching: 1b.
B_ = build_branching(greedy_subopt_branching_1b)
assert_equal_branchings(B, B_)
def test_greedy_max2():
# Different default weight.
#
G = G1()
del G[1][0][0]["weight"]
B = branchings.greedy_branching(G, default=6)
# Chosen so that edge (3,0,5) is not selected and (1,0,6) is instead.
edges = [
(1, 0, 6),
(1, 5, 13),
(7, 6, 15),
(2, 1, 17),
(3, 4, 17),
(8, 7, 18),
(2, 3, 21),
(6, 2, 21),
]
B_ = build_branching(edges)
assert_equal_branchings(B, B_)
def test_greedy_max3():
# All equal weights.
#
G = G1()
B = branchings.greedy_branching(G, attr=None)
# This is mostly arbitrary...the output was generated by running the algo.
edges = [
(2, 1, 1),
(3, 0, 1),
(3, 4, 1),
(5, 8, 1),
(6, 2, 1),
(7, 3, 1),
(7, 6, 1),
(8, 7, 1),
]
B_ = build_branching(edges)
assert_equal_branchings(B, B_, default=1)
def test_greedy_min():
G = G1()
B = branchings.greedy_branching(G, kind="min")
edges = [
(1, 0, 4),
(0, 2, 12),
(0, 4, 12),
(2, 5, 12),
(4, 7, 12),
(5, 8, 12),
(5, 6, 14),
(7, 3, 19),
]
B_ = build_branching(edges)
assert_equal_branchings(B, B_)
def test_edmonds1_maxbranch():
G = G1()
x = branchings.maximum_branching(G)
x_ = build_branching(optimal_arborescence_1)
assert_equal_branchings(x, x_)
def test_edmonds1_maxarbor():
G = G1()
x = branchings.maximum_spanning_arborescence(G)
x_ = build_branching(optimal_arborescence_1)
assert_equal_branchings(x, x_)
def test_edmonds2_maxbranch():
G = G2()
x = branchings.maximum_branching(G)
x_ = build_branching(optimal_branching_2a)
assert_equal_branchings(x, x_)
def test_edmonds2_maxarbor():
G = G2()
x = branchings.maximum_spanning_arborescence(G)
x_ = build_branching(optimal_arborescence_2)
assert_equal_branchings(x, x_)
def test_edmonds2_minarbor():
G = G1()
x = branchings.minimum_spanning_arborescence(G)
# This was obtained from algorithm. Need to verify it independently.
# Branch weight is: 96
edges = [
(3, 0, 5),
(0, 2, 12),
(0, 4, 12),
(2, 5, 12),
(4, 7, 12),
(5, 8, 12),
(5, 6, 14),
(2, 1, 17),
]
x_ = build_branching(edges)
assert_equal_branchings(x, x_)
def test_edmonds3_minbranch1():
G = G1()
x = branchings.minimum_branching(G)
edges = []
x_ = build_branching(edges)
assert_equal_branchings(x, x_)
def test_edmonds3_minbranch2():
G = G1()
G.add_edge(8, 9, weight=-10)
x = branchings.minimum_branching(G)
edges = [(8, 9, -10)]
x_ = build_branching(edges)
assert_equal_branchings(x, x_)
# Need more tests
def test_mst():
# Make sure we get the same results for undirected graphs.
# Example from: https://en.wikipedia.org/wiki/Kruskal's_algorithm
G = nx.Graph()
edgelist = [
(0, 3, [("weight", 5)]),
(0, 1, [("weight", 7)]),
(1, 3, [("weight", 9)]),
(1, 2, [("weight", 8)]),
(1, 4, [("weight", 7)]),
(3, 4, [("weight", 15)]),
(3, 5, [("weight", 6)]),
(2, 4, [("weight", 5)]),
(4, 5, [("weight", 8)]),
(4, 6, [("weight", 9)]),
(5, 6, [("weight", 11)]),
]
G.add_edges_from(edgelist)
G = G.to_directed()
x = branchings.minimum_spanning_arborescence(G)
edges = [
({0, 1}, 7),
({0, 3}, 5),
({3, 5}, 6),
({1, 4}, 7),
({4, 2}, 5),
({4, 6}, 9),
]
assert x.number_of_edges() == len(edges)
for u, v, d in x.edges(data=True):
assert ({u, v}, d["weight"]) in edges
def test_mixed_nodetypes():
# Smoke test to make sure no TypeError is raised for mixed node types.
G = nx.Graph()
edgelist = [(0, 3, [("weight", 5)]), (0, "1", [("weight", 5)])]
G.add_edges_from(edgelist)
G = G.to_directed()
x = branchings.minimum_spanning_arborescence(G)
def test_edmonds1_minbranch():
# Using -G_array and min should give the same as optimal_arborescence_1,
# but with all edges negative.
edges = [(u, v, -w) for (u, v, w) in optimal_arborescence_1]
G = nx.from_numpy_array(-G_array, create_using=nx.DiGraph)
# Quickly make sure max branching is empty.
x = branchings.maximum_branching(G)
x_ = build_branching([])
assert_equal_branchings(x, x_)
# Now test the min branching.
x = branchings.minimum_branching(G)
x_ = build_branching(edges)
assert_equal_branchings(x, x_)
def test_edge_attribute_preservation_normal_graph():
# Test that edge attributes are preserved when finding an optimum graph
# using the Edmonds class for normal graphs.
G = nx.Graph()
edgelist = [
(0, 1, [("weight", 5), ("otherattr", 1), ("otherattr2", 3)]),
(0, 2, [("weight", 5), ("otherattr", 2), ("otherattr2", 2)]),
(1, 2, [("weight", 6), ("otherattr", 3), ("otherattr2", 1)]),
]
G.add_edges_from(edgelist)
ed = branchings.Edmonds(G)
B = ed.find_optimum("weight", preserve_attrs=True, seed=1)
assert B[0][1]["otherattr"] == 1
assert B[0][1]["otherattr2"] == 3
def test_edge_attribute_preservation_multigraph():
# Test that edge attributes are preserved when finding an optimum graph
# using the Edmonds class for multigraphs.
G = nx.MultiGraph()
edgelist = [
(0, 1, [("weight", 5), ("otherattr", 1), ("otherattr2", 3)]),
(0, 2, [("weight", 5), ("otherattr", 2), ("otherattr2", 2)]),
(1, 2, [("weight", 6), ("otherattr", 3), ("otherattr2", 1)]),
]
G.add_edges_from(edgelist * 2) # Make sure we have duplicate edge paths
ed = branchings.Edmonds(G)
B = ed.find_optimum("weight", preserve_attrs=True)
assert B[0][1][0]["otherattr"] == 1
assert B[0][1][0]["otherattr2"] == 3
def test_edge_attribute_discard():
# Test that edge attributes are discarded if we do not specify to keep them
G = nx.Graph()
edgelist = [
(0, 1, [("weight", 5), ("otherattr", 1), ("otherattr2", 3)]),
(0, 2, [("weight", 5), ("otherattr", 2), ("otherattr2", 2)]),
(1, 2, [("weight", 6), ("otherattr", 3), ("otherattr2", 1)]),
]
G.add_edges_from(edgelist)
ed = branchings.Edmonds(G)
B = ed.find_optimum("weight", preserve_attrs=False)
edge_dict = B[0][1]
with pytest.raises(KeyError):
_ = edge_dict["otherattr"]

View file

@ -0,0 +1,117 @@
"""Unit tests for the :mod:`~networkx.algorithms.tree.coding` module."""
from itertools import product
import pytest
import networkx as nx
from networkx.testing import assert_nodes_equal
from networkx.testing import assert_edges_equal
class TestPruferSequence:
"""Unit tests for the Prüfer sequence encoding and decoding
functions.
"""
def test_nontree(self):
with pytest.raises(nx.NotATree):
G = nx.cycle_graph(3)
nx.to_prufer_sequence(G)
def test_null_graph(self):
with pytest.raises(nx.NetworkXPointlessConcept):
nx.to_prufer_sequence(nx.null_graph())
def test_trivial_graph(self):
with pytest.raises(nx.NetworkXPointlessConcept):
nx.to_prufer_sequence(nx.trivial_graph())
def test_bad_integer_labels(self):
with pytest.raises(KeyError):
T = nx.Graph(nx.utils.pairwise("abc"))
nx.to_prufer_sequence(T)
def test_encoding(self):
"""Tests for encoding a tree as a Prüfer sequence using the
iterative strategy.
"""
# Example from Wikipedia.
tree = nx.Graph([(0, 3), (1, 3), (2, 3), (3, 4), (4, 5)])
sequence = nx.to_prufer_sequence(tree)
assert sequence == [3, 3, 3, 4]
def test_decoding(self):
"""Tests for decoding a tree from a Prüfer sequence."""
# Example from Wikipedia.
sequence = [3, 3, 3, 4]
tree = nx.from_prufer_sequence(sequence)
assert_nodes_equal(list(tree), list(range(6)))
edges = [(0, 3), (1, 3), (2, 3), (3, 4), (4, 5)]
assert_edges_equal(list(tree.edges()), edges)
def test_decoding2(self):
# Example from "An Optimal Algorithm for Prufer Codes".
sequence = [2, 4, 0, 1, 3, 3]
tree = nx.from_prufer_sequence(sequence)
assert_nodes_equal(list(tree), list(range(8)))
edges = [(0, 1), (0, 4), (1, 3), (2, 4), (2, 5), (3, 6), (3, 7)]
assert_edges_equal(list(tree.edges()), edges)
def test_inverse(self):
"""Tests that the encoding and decoding functions are inverses.
"""
for T in nx.nonisomorphic_trees(4):
T2 = nx.from_prufer_sequence(nx.to_prufer_sequence(T))
assert_nodes_equal(list(T), list(T2))
assert_edges_equal(list(T.edges()), list(T2.edges()))
for seq in product(range(4), repeat=2):
seq2 = nx.to_prufer_sequence(nx.from_prufer_sequence(seq))
assert list(seq) == seq2
class TestNestedTuple:
"""Unit tests for the nested tuple encoding and decoding functions.
"""
def test_nontree(self):
with pytest.raises(nx.NotATree):
G = nx.cycle_graph(3)
nx.to_nested_tuple(G, 0)
def test_unknown_root(self):
with pytest.raises(nx.NodeNotFound):
G = nx.path_graph(2)
nx.to_nested_tuple(G, "bogus")
def test_encoding(self):
T = nx.full_rary_tree(2, 2 ** 3 - 1)
expected = (((), ()), ((), ()))
actual = nx.to_nested_tuple(T, 0)
assert_nodes_equal(expected, actual)
def test_canonical_form(self):
T = nx.Graph()
T.add_edges_from([(0, 1), (0, 2), (0, 3)])
T.add_edges_from([(1, 4), (1, 5)])
T.add_edges_from([(3, 6), (3, 7)])
root = 0
actual = nx.to_nested_tuple(T, root, canonical_form=True)
expected = ((), ((), ()), ((), ()))
assert actual == expected
def test_decoding(self):
balanced = (((), ()), ((), ()))
expected = nx.full_rary_tree(2, 2 ** 3 - 1)
actual = nx.from_nested_tuple(balanced)
assert nx.is_isomorphic(expected, actual)
def test_sensible_relabeling(self):
balanced = (((), ()), ((), ()))
T = nx.from_nested_tuple(balanced, sensible_relabeling=True)
edges = [(0, 1), (0, 2), (1, 3), (1, 4), (2, 5), (2, 6)]
assert_nodes_equal(list(T), list(range(2 ** 3 - 1)))
assert_edges_equal(list(T.edges()), edges)

View file

@ -0,0 +1,79 @@
import networkx as nx
from networkx.algorithms.tree.decomposition import junction_tree
def test_junction_tree_directed_confounders():
B = nx.DiGraph()
B.add_edges_from([("A", "C"), ("B", "C"), ("C", "D"), ("C", "E")])
G = junction_tree(B)
J = nx.Graph()
J.add_edges_from(
[
(("C", "E"), ("C",)),
(("C",), ("A", "B", "C")),
(("A", "B", "C"), ("C",)),
(("C",), ("C", "D")),
]
)
assert nx.is_isomorphic(G, J)
def test_junction_tree_directed_unconnected_nodes():
B = nx.DiGraph()
B.add_nodes_from([("A", "B", "C", "D")])
G = junction_tree(B)
J = nx.Graph()
J.add_nodes_from([("A", "B", "C", "D")])
assert nx.is_isomorphic(G, J)
def test_junction_tree_directed_cascade():
B = nx.DiGraph()
B.add_edges_from([("A", "B"), ("B", "C"), ("C", "D")])
G = junction_tree(B)
J = nx.Graph()
J.add_edges_from(
[
(("A", "B"), ("B",)),
(("B",), ("B", "C")),
(("B", "C"), ("C",)),
(("C",), ("C", "D")),
]
)
assert nx.is_isomorphic(G, J)
def test_junction_tree_directed_unconnected_edges():
B = nx.DiGraph()
B.add_edges_from([("A", "B"), ("C", "D"), ("E", "F")])
G = junction_tree(B)
J = nx.Graph()
J.add_nodes_from([("A", "B"), ("C", "D"), ("E", "F")])
assert nx.is_isomorphic(G, J)
def test_junction_tree_undirected():
B = nx.Graph()
B.add_edges_from([("A", "C"), ("A", "D"), ("B", "C"), ("C", "E")])
G = junction_tree(B)
J = nx.Graph()
J.add_edges_from(
[
(("A", "D"), ("A",)),
(("A",), ("A", "C")),
(("A", "C"), ("C",)),
(("C",), ("B", "C")),
(("B", "C"), ("C",)),
(("C",), ("C", "E")),
]
)
assert nx.is_isomorphic(G, J)

View file

@ -0,0 +1,285 @@
"""Unit tests for the :mod:`networkx.algorithms.tree.mst` module."""
import pytest
import networkx as nx
from networkx.testing import assert_nodes_equal, assert_edges_equal
def test_unknown_algorithm():
with pytest.raises(ValueError):
nx.minimum_spanning_tree(nx.Graph(), algorithm="random")
class MinimumSpanningTreeTestBase:
"""Base class for test classes for minimum spanning tree algorithms.
This class contains some common tests that will be inherited by
subclasses. Each subclass must have a class attribute
:data:`algorithm` that is a string representing the algorithm to
run, as described under the ``algorithm`` keyword argument for the
:func:`networkx.minimum_spanning_edges` function. Subclasses can
then implement any algorithm-specific tests.
"""
def setup_method(self, method):
"""Creates an example graph and stores the expected minimum and
maximum spanning tree edges.
"""
# This stores the class attribute `algorithm` in an instance attribute.
self.algo = self.algorithm
# This example graph comes from Wikipedia:
# https://en.wikipedia.org/wiki/Kruskal's_algorithm
edges = [
(0, 1, 7),
(0, 3, 5),
(1, 2, 8),
(1, 3, 9),
(1, 4, 7),
(2, 4, 5),
(3, 4, 15),
(3, 5, 6),
(4, 5, 8),
(4, 6, 9),
(5, 6, 11),
]
self.G = nx.Graph()
self.G.add_weighted_edges_from(edges)
self.minimum_spanning_edgelist = [
(0, 1, {"weight": 7}),
(0, 3, {"weight": 5}),
(1, 4, {"weight": 7}),
(2, 4, {"weight": 5}),
(3, 5, {"weight": 6}),
(4, 6, {"weight": 9}),
]
self.maximum_spanning_edgelist = [
(0, 1, {"weight": 7}),
(1, 2, {"weight": 8}),
(1, 3, {"weight": 9}),
(3, 4, {"weight": 15}),
(4, 6, {"weight": 9}),
(5, 6, {"weight": 11}),
]
def test_minimum_edges(self):
edges = nx.minimum_spanning_edges(self.G, algorithm=self.algo)
# Edges from the spanning edges functions don't come in sorted
# orientation, so we need to sort each edge individually.
actual = sorted((min(u, v), max(u, v), d) for u, v, d in edges)
assert_edges_equal(actual, self.minimum_spanning_edgelist)
def test_maximum_edges(self):
edges = nx.maximum_spanning_edges(self.G, algorithm=self.algo)
# Edges from the spanning edges functions don't come in sorted
# orientation, so we need to sort each edge individually.
actual = sorted((min(u, v), max(u, v), d) for u, v, d in edges)
assert_edges_equal(actual, self.maximum_spanning_edgelist)
def test_without_data(self):
edges = nx.minimum_spanning_edges(self.G, algorithm=self.algo, data=False)
# Edges from the spanning edges functions don't come in sorted
# orientation, so we need to sort each edge individually.
actual = sorted((min(u, v), max(u, v)) for u, v in edges)
expected = [(u, v) for u, v, d in self.minimum_spanning_edgelist]
assert_edges_equal(actual, expected)
def test_nan_weights(self):
# Edge weights NaN never appear in the spanning tree. see #2164
G = self.G
G.add_edge(0, 12, weight=float("nan"))
edges = nx.minimum_spanning_edges(
G, algorithm=self.algo, data=False, ignore_nan=True
)
actual = sorted((min(u, v), max(u, v)) for u, v in edges)
expected = [(u, v) for u, v, d in self.minimum_spanning_edgelist]
assert_edges_equal(actual, expected)
# Now test for raising exception
edges = nx.minimum_spanning_edges(
G, algorithm=self.algo, data=False, ignore_nan=False
)
with pytest.raises(ValueError):
list(edges)
# test default for ignore_nan as False
edges = nx.minimum_spanning_edges(G, algorithm=self.algo, data=False)
with pytest.raises(ValueError):
list(edges)
def test_nan_weights_order(self):
# now try again with a nan edge at the beginning of G.nodes
edges = [
(0, 1, 7),
(0, 3, 5),
(1, 2, 8),
(1, 3, 9),
(1, 4, 7),
(2, 4, 5),
(3, 4, 15),
(3, 5, 6),
(4, 5, 8),
(4, 6, 9),
(5, 6, 11),
]
G = nx.Graph()
G.add_weighted_edges_from([(u + 1, v + 1, wt) for u, v, wt in edges])
G.add_edge(0, 7, weight=float("nan"))
edges = nx.minimum_spanning_edges(
G, algorithm=self.algo, data=False, ignore_nan=True
)
actual = sorted((min(u, v), max(u, v)) for u, v in edges)
shift = [(u + 1, v + 1) for u, v, d in self.minimum_spanning_edgelist]
assert_edges_equal(actual, shift)
def test_isolated_node(self):
# now try again with an isolated node
edges = [
(0, 1, 7),
(0, 3, 5),
(1, 2, 8),
(1, 3, 9),
(1, 4, 7),
(2, 4, 5),
(3, 4, 15),
(3, 5, 6),
(4, 5, 8),
(4, 6, 9),
(5, 6, 11),
]
G = nx.Graph()
G.add_weighted_edges_from([(u + 1, v + 1, wt) for u, v, wt in edges])
G.add_node(0)
edges = nx.minimum_spanning_edges(
G, algorithm=self.algo, data=False, ignore_nan=True
)
actual = sorted((min(u, v), max(u, v)) for u, v in edges)
shift = [(u + 1, v + 1) for u, v, d in self.minimum_spanning_edgelist]
assert_edges_equal(actual, shift)
def test_minimum_tree(self):
T = nx.minimum_spanning_tree(self.G, algorithm=self.algo)
actual = sorted(T.edges(data=True))
assert_edges_equal(actual, self.minimum_spanning_edgelist)
def test_maximum_tree(self):
T = nx.maximum_spanning_tree(self.G, algorithm=self.algo)
actual = sorted(T.edges(data=True))
assert_edges_equal(actual, self.maximum_spanning_edgelist)
def test_disconnected(self):
G = nx.Graph([(0, 1, dict(weight=1)), (2, 3, dict(weight=2))])
T = nx.minimum_spanning_tree(G, algorithm=self.algo)
assert_nodes_equal(list(T), list(range(4)))
assert_edges_equal(list(T.edges()), [(0, 1), (2, 3)])
def test_empty_graph(self):
G = nx.empty_graph(3)
T = nx.minimum_spanning_tree(G, algorithm=self.algo)
assert_nodes_equal(sorted(T), list(range(3)))
assert T.number_of_edges() == 0
def test_attributes(self):
G = nx.Graph()
G.add_edge(1, 2, weight=1, color="red", distance=7)
G.add_edge(2, 3, weight=1, color="green", distance=2)
G.add_edge(1, 3, weight=10, color="blue", distance=1)
G.graph["foo"] = "bar"
T = nx.minimum_spanning_tree(G, algorithm=self.algo)
assert T.graph == G.graph
assert_nodes_equal(T, G)
for u, v in T.edges():
assert T.adj[u][v] == G.adj[u][v]
def test_weight_attribute(self):
G = nx.Graph()
G.add_edge(0, 1, weight=1, distance=7)
G.add_edge(0, 2, weight=30, distance=1)
G.add_edge(1, 2, weight=1, distance=1)
G.add_node(3)
T = nx.minimum_spanning_tree(G, algorithm=self.algo, weight="distance")
assert_nodes_equal(sorted(T), list(range(4)))
assert_edges_equal(sorted(T.edges()), [(0, 2), (1, 2)])
T = nx.maximum_spanning_tree(G, algorithm=self.algo, weight="distance")
assert_nodes_equal(sorted(T), list(range(4)))
assert_edges_equal(sorted(T.edges()), [(0, 1), (0, 2)])
class TestBoruvka(MinimumSpanningTreeTestBase):
"""Unit tests for computing a minimum (or maximum) spanning tree
using Borůvka's algorithm.
"""
algorithm = "boruvka"
def test_unicode_name(self):
"""Tests that using a Unicode string can correctly indicate
Borůvka's algorithm.
"""
edges = nx.minimum_spanning_edges(self.G, algorithm="borůvka")
# Edges from the spanning edges functions don't come in sorted
# orientation, so we need to sort each edge individually.
actual = sorted((min(u, v), max(u, v), d) for u, v, d in edges)
assert_edges_equal(actual, self.minimum_spanning_edgelist)
class MultigraphMSTTestBase(MinimumSpanningTreeTestBase):
# Abstract class
def test_multigraph_keys_min(self):
"""Tests that the minimum spanning edges of a multigraph
preserves edge keys.
"""
G = nx.MultiGraph()
G.add_edge(0, 1, key="a", weight=2)
G.add_edge(0, 1, key="b", weight=1)
min_edges = nx.minimum_spanning_edges
mst_edges = min_edges(G, algorithm=self.algo, data=False)
assert_edges_equal([(0, 1, "b")], list(mst_edges))
def test_multigraph_keys_max(self):
"""Tests that the maximum spanning edges of a multigraph
preserves edge keys.
"""
G = nx.MultiGraph()
G.add_edge(0, 1, key="a", weight=2)
G.add_edge(0, 1, key="b", weight=1)
max_edges = nx.maximum_spanning_edges
mst_edges = max_edges(G, algorithm=self.algo, data=False)
assert_edges_equal([(0, 1, "a")], list(mst_edges))
class TestKruskal(MultigraphMSTTestBase):
"""Unit tests for computing a minimum (or maximum) spanning tree
using Kruskal's algorithm.
"""
algorithm = "kruskal"
class TestPrim(MultigraphMSTTestBase):
"""Unit tests for computing a minimum (or maximum) spanning tree
using Prim's algorithm.
"""
algorithm = "prim"
def test_multigraph_keys_tree(self):
G = nx.MultiGraph()
G.add_edge(0, 1, key="a", weight=2)
G.add_edge(0, 1, key="b", weight=1)
T = nx.minimum_spanning_tree(G)
assert_edges_equal([(0, 1, 1)], list(T.edges(data="weight")))
def test_multigraph_keys_tree_max(self):
G = nx.MultiGraph()
G.add_edge(0, 1, key="a", weight=2)
G.add_edge(0, 1, key="b", weight=1)
T = nx.maximum_spanning_tree(G)
assert_edges_equal([(0, 1, 2)], list(T.edges(data="weight")))

View file

@ -0,0 +1,38 @@
"""Unit tests for the :mod:`networkx.algorithms.tree.operations` module.
"""
import networkx as nx
from networkx.testing import assert_nodes_equal
from networkx.testing import assert_edges_equal
class TestJoin:
"""Unit tests for the :func:`networkx.tree.join` function."""
def test_empty_sequence(self):
"""Tests that joining the empty sequence results in the tree
with one node.
"""
T = nx.join([])
assert len(T) == 1
assert T.number_of_edges() == 0
def test_single(self):
"""Tests that joining just one tree yields a tree with one more
node.
"""
T = nx.empty_graph(1)
actual = nx.join([(T, 0)])
expected = nx.path_graph(2)
assert_nodes_equal(list(expected), list(actual))
assert_edges_equal(list(expected.edges()), list(actual.edges()))
def test_basic(self):
"""Tests for joining multiple subtrees at a root node."""
trees = [(nx.full_rary_tree(2, 2 ** 2 - 1), 0) for i in range(2)]
actual = nx.join(trees)
expected = nx.full_rary_tree(2, 2 ** 3 - 1)
assert nx.is_isomorphic(actual, expected)

View file

@ -0,0 +1,163 @@
import pytest
import networkx as nx
class TestTreeRecognition:
graph = nx.Graph
multigraph = nx.MultiGraph
@classmethod
def setup_class(cls):
cls.T1 = cls.graph()
cls.T2 = cls.graph()
cls.T2.add_node(1)
cls.T3 = cls.graph()
cls.T3.add_nodes_from(range(5))
edges = [(i, i + 1) for i in range(4)]
cls.T3.add_edges_from(edges)
cls.T5 = cls.multigraph()
cls.T5.add_nodes_from(range(5))
edges = [(i, i + 1) for i in range(4)]
cls.T5.add_edges_from(edges)
cls.T6 = cls.graph()
cls.T6.add_nodes_from([6, 7])
cls.T6.add_edge(6, 7)
cls.F1 = nx.compose(cls.T6, cls.T3)
cls.N4 = cls.graph()
cls.N4.add_node(1)
cls.N4.add_edge(1, 1)
cls.N5 = cls.graph()
cls.N5.add_nodes_from(range(5))
cls.N6 = cls.graph()
cls.N6.add_nodes_from(range(3))
cls.N6.add_edges_from([(0, 1), (1, 2), (2, 0)])
cls.NF1 = nx.compose(cls.T6, cls.N6)
def test_null_tree(self):
with pytest.raises(nx.NetworkXPointlessConcept):
nx.is_tree(self.graph())
def test_null_tree2(self):
with pytest.raises(nx.NetworkXPointlessConcept):
nx.is_tree(self.multigraph())
def test_null_forest(self):
with pytest.raises(nx.NetworkXPointlessConcept):
nx.is_forest(self.graph())
def test_null_forest2(self):
with pytest.raises(nx.NetworkXPointlessConcept):
nx.is_forest(self.multigraph())
def test_is_tree(self):
assert nx.is_tree(self.T2)
assert nx.is_tree(self.T3)
assert nx.is_tree(self.T5)
def test_is_not_tree(self):
assert not nx.is_tree(self.N4)
assert not nx.is_tree(self.N5)
assert not nx.is_tree(self.N6)
def test_is_forest(self):
assert nx.is_forest(self.T2)
assert nx.is_forest(self.T3)
assert nx.is_forest(self.T5)
assert nx.is_forest(self.F1)
assert nx.is_forest(self.N5)
def test_is_not_forest(self):
assert not nx.is_forest(self.N4)
assert not nx.is_forest(self.N6)
assert not nx.is_forest(self.NF1)
class TestDirectedTreeRecognition(TestTreeRecognition):
graph = nx.DiGraph
multigraph = nx.MultiDiGraph
def test_disconnected_graph():
# https://github.com/networkx/networkx/issues/1144
G = nx.Graph()
G.add_edges_from([(0, 1), (1, 2), (2, 0), (3, 4)])
assert not nx.is_tree(G)
G = nx.DiGraph()
G.add_edges_from([(0, 1), (1, 2), (2, 0), (3, 4)])
assert not nx.is_tree(G)
def test_dag_nontree():
G = nx.DiGraph()
G.add_edges_from([(0, 1), (0, 2), (1, 2)])
assert not nx.is_tree(G)
assert nx.is_directed_acyclic_graph(G)
def test_multicycle():
G = nx.MultiDiGraph()
G.add_edges_from([(0, 1), (0, 1)])
assert not nx.is_tree(G)
assert nx.is_directed_acyclic_graph(G)
def test_emptybranch():
G = nx.DiGraph()
G.add_nodes_from(range(10))
assert nx.is_branching(G)
assert not nx.is_arborescence(G)
def test_path():
G = nx.DiGraph()
nx.add_path(G, range(5))
assert nx.is_branching(G)
assert nx.is_arborescence(G)
def test_notbranching1():
# Acyclic violation.
G = nx.MultiDiGraph()
G.add_nodes_from(range(10))
G.add_edges_from([(0, 1), (1, 0)])
assert not nx.is_branching(G)
assert not nx.is_arborescence(G)
def test_notbranching2():
# In-degree violation.
G = nx.MultiDiGraph()
G.add_nodes_from(range(10))
G.add_edges_from([(0, 1), (0, 2), (3, 2)])
assert not nx.is_branching(G)
assert not nx.is_arborescence(G)
def test_notarborescence1():
# Not an arborescence due to not spanning.
G = nx.MultiDiGraph()
G.add_nodes_from(range(10))
G.add_edges_from([(0, 1), (0, 2), (1, 3), (5, 6)])
assert nx.is_branching(G)
assert not nx.is_arborescence(G)
def test_notarborescence2():
# Not an arborescence due to in-degree violation.
G = nx.MultiDiGraph()
nx.add_path(G, range(5))
G.add_edge(6, 4)
assert not nx.is_branching(G)
assert not nx.is_arborescence(G)