Fixed database typo and removed unnecessary class identifier.

This commit is contained in:
Batuhan Berk Başoğlu 2020-10-14 10:10:37 -04:00
parent 00ad49a143
commit 45fb349a7d
5098 changed files with 952558 additions and 85 deletions

View file

@ -0,0 +1,25 @@
"""Functions for computing and measuring community structure.
The functions in this class are not imported into the top-level
:mod:`networkx` namespace. You can access these functions by importing
the :mod:`networkx.algorithms.community` module, then accessing the
functions as attributes of ``community``. For example::
>>> from networkx.algorithms import community
>>> G = nx.barbell_graph(5, 1)
>>> communities_generator = community.girvan_newman(G)
>>> top_level_communities = next(communities_generator)
>>> next_level_communities = next(communities_generator)
>>> sorted(map(sorted, next_level_communities))
[[0, 1, 2, 3, 4], [5], [6, 7, 8, 9, 10]]
"""
from networkx.algorithms.community.asyn_fluid import *
from networkx.algorithms.community.centrality import *
from networkx.algorithms.community.kclique import *
from networkx.algorithms.community.kernighan_lin import *
from networkx.algorithms.community.label_propagation import *
from networkx.algorithms.community.lukes import *
from networkx.algorithms.community.modularity_max import *
from networkx.algorithms.community.quality import *
from networkx.algorithms.community.community_utils import *

View file

@ -0,0 +1,148 @@
"""Asynchronous Fluid Communities algorithm for community detection."""
from collections import Counter
from networkx.exception import NetworkXError
from networkx.algorithms.components import is_connected
from networkx.utils import groups
from networkx.utils import not_implemented_for
from networkx.utils import py_random_state
__all__ = ["asyn_fluidc"]
@py_random_state(3)
@not_implemented_for("directed", "multigraph")
def asyn_fluidc(G, k, max_iter=100, seed=None):
"""Returns communities in `G` as detected by Fluid Communities algorithm.
The asynchronous fluid communities algorithm is described in
[1]_. The algorithm is based on the simple idea of fluids interacting
in an environment, expanding and pushing each other. Its initialization is
random, so found communities may vary on different executions.
The algorithm proceeds as follows. First each of the initial k communities
is initialized in a random vertex in the graph. Then the algorithm iterates
over all vertices in a random order, updating the community of each vertex
based on its own community and the communities of its neighbours. This
process is performed several times until convergence.
At all times, each community has a total density of 1, which is equally
distributed among the vertices it contains. If a vertex changes of
community, vertex densities of affected communities are adjusted
immediately. When a complete iteration over all vertices is done, such that
no vertex changes the community it belongs to, the algorithm has converged
and returns.
This is the original version of the algorithm described in [1]_.
Unfortunately, it does not support weighted graphs yet.
Parameters
----------
G : Graph
k : integer
The number of communities to be found.
max_iter : integer
The number of maximum iterations allowed. By default 100.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Returns
-------
communities : iterable
Iterable of communities given as sets of nodes.
Notes
-----
k variable is not an optional argument.
References
----------
.. [1] Parés F., Garcia-Gasulla D. et al. "Fluid Communities: A
Competitive and Highly Scalable Community Detection Algorithm".
[https://arxiv.org/pdf/1703.09307.pdf].
"""
# Initial checks
if not isinstance(k, int):
raise NetworkXError("k must be an integer.")
if not k > 0:
raise NetworkXError("k must be greater than 0.")
if not is_connected(G):
raise NetworkXError("Fluid Communities require connected Graphs.")
if len(G) < k:
raise NetworkXError("k cannot be bigger than the number of nodes.")
# Initialization
max_density = 1.0
vertices = list(G)
seed.shuffle(vertices)
communities = {n: i for i, n in enumerate(vertices[:k])}
density = {}
com_to_numvertices = {}
for vertex in communities.keys():
com_to_numvertices[communities[vertex]] = 1
density[communities[vertex]] = max_density
# Set up control variables and start iterating
iter_count = 0
cont = True
while cont:
cont = False
iter_count += 1
# Loop over all vertices in graph in a random order
vertices = list(G)
seed.shuffle(vertices)
for vertex in vertices:
# Updating rule
com_counter = Counter()
# Take into account self vertex community
try:
com_counter.update({communities[vertex]: density[communities[vertex]]})
except KeyError:
pass
# Gather neighbour vertex communities
for v in G[vertex]:
try:
com_counter.update({communities[v]: density[communities[v]]})
except KeyError:
continue
# Check which is the community with highest density
new_com = -1
if len(com_counter.keys()) > 0:
max_freq = max(com_counter.values())
best_communities = [
com
for com, freq in com_counter.items()
if (max_freq - freq) < 0.0001
]
# If actual vertex com in best communities, it is preserved
try:
if communities[vertex] in best_communities:
new_com = communities[vertex]
except KeyError:
pass
# If vertex community changes...
if new_com == -1:
# Set flag of non-convergence
cont = True
# Randomly chose a new community from candidates
new_com = seed.choice(best_communities)
# Update previous community status
try:
com_to_numvertices[communities[vertex]] -= 1
density[communities[vertex]] = (
max_density / com_to_numvertices[communities[vertex]]
)
except KeyError:
pass
# Update new community status
communities[vertex] = new_com
com_to_numvertices[communities[vertex]] += 1
density[communities[vertex]] = (
max_density / com_to_numvertices[communities[vertex]]
)
# If maximum iterations reached --> output actual results
if iter_count > max_iter:
break
# Return results by grouping communities as list of vertices
return iter(groups(communities).values())

View file

@ -0,0 +1,170 @@
"""Functions for computing communities based on centrality notions."""
import networkx as nx
__all__ = ["girvan_newman"]
def girvan_newman(G, most_valuable_edge=None):
"""Finds communities in a graph using the GirvanNewman method.
Parameters
----------
G : NetworkX graph
most_valuable_edge : function
Function that takes a graph as input and outputs an edge. The
edge returned by this function will be recomputed and removed at
each iteration of the algorithm.
If not specified, the edge with the highest
:func:`networkx.edge_betweenness_centrality` will be used.
Returns
-------
iterator
Iterator over tuples of sets of nodes in `G`. Each set of node
is a community, each tuple is a sequence of communities at a
particular level of the algorithm.
Examples
--------
To get the first pair of communities::
>>> G = nx.path_graph(10)
>>> comp = girvan_newman(G)
>>> tuple(sorted(c) for c in next(comp))
([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
To get only the first *k* tuples of communities, use
:func:`itertools.islice`::
>>> import itertools
>>> G = nx.path_graph(8)
>>> k = 2
>>> comp = girvan_newman(G)
>>> for communities in itertools.islice(comp, k):
... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP
...
([0, 1, 2, 3], [4, 5, 6, 7])
([0, 1], [2, 3], [4, 5, 6, 7])
To stop getting tuples of communities once the number of communities
is greater than *k*, use :func:`itertools.takewhile`::
>>> import itertools
>>> G = nx.path_graph(8)
>>> k = 4
>>> comp = girvan_newman(G)
>>> limited = itertools.takewhile(lambda c: len(c) <= k, comp)
>>> for communities in limited:
... print(tuple(sorted(c) for c in communities)) # doctest: +SKIP
...
([0, 1, 2, 3], [4, 5, 6, 7])
([0, 1], [2, 3], [4, 5, 6, 7])
([0, 1], [2, 3], [4, 5], [6, 7])
To just choose an edge to remove based on the weight::
>>> from operator import itemgetter
>>> G = nx.path_graph(10)
>>> edges = G.edges()
>>> nx.set_edge_attributes(G, {(u, v): v for u, v in edges}, "weight")
>>> def heaviest(G):
... u, v, w = max(G.edges(data="weight"), key=itemgetter(2))
... return (u, v)
...
>>> comp = girvan_newman(G, most_valuable_edge=heaviest)
>>> tuple(sorted(c) for c in next(comp))
([0, 1, 2, 3, 4, 5, 6, 7, 8], [9])
To utilize edge weights when choosing an edge with, for example, the
highest betweenness centrality::
>>> from networkx import edge_betweenness_centrality as betweenness
>>> def most_central_edge(G):
... centrality = betweenness(G, weight="weight")
... return max(centrality, key=centrality.get)
...
>>> G = nx.path_graph(10)
>>> comp = girvan_newman(G, most_valuable_edge=most_central_edge)
>>> tuple(sorted(c) for c in next(comp))
([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])
To specify a different ranking algorithm for edges, use the
`most_valuable_edge` keyword argument::
>>> from networkx import edge_betweenness_centrality
>>> from random import random
>>> def most_central_edge(G):
... centrality = edge_betweenness_centrality(G)
... max_cent = max(centrality.values())
... # Scale the centrality values so they are between 0 and 1,
... # and add some random noise.
... centrality = {e: c / max_cent for e, c in centrality.items()}
... # Add some random noise.
... centrality = {e: c + random() for e, c in centrality.items()}
... return max(centrality, key=centrality.get)
...
>>> G = nx.path_graph(10)
>>> comp = girvan_newman(G, most_valuable_edge=most_central_edge)
Notes
-----
The GirvanNewman algorithm detects communities by progressively
removing edges from the original graph. The algorithm removes the
"most valuable" edge, traditionally the edge with the highest
betweenness centrality, at each step. As the graph breaks down into
pieces, the tightly knit community structure is exposed and the
result can be depicted as a dendrogram.
"""
# If the graph is already empty, simply return its connected
# components.
if G.number_of_edges() == 0:
yield tuple(nx.connected_components(G))
return
# If no function is provided for computing the most valuable edge,
# use the edge betweenness centrality.
if most_valuable_edge is None:
def most_valuable_edge(G):
"""Returns the edge with the highest betweenness centrality
in the graph `G`.
"""
# We have guaranteed that the graph is non-empty, so this
# dictionary will never be empty.
betweenness = nx.edge_betweenness_centrality(G)
return max(betweenness, key=betweenness.get)
# The copy of G here must include the edge weight data.
g = G.copy().to_undirected()
# Self-loops must be removed because their removal has no effect on
# the connected components of the graph.
g.remove_edges_from(nx.selfloop_edges(g))
while g.number_of_edges() > 0:
yield _without_most_central_edges(g, most_valuable_edge)
def _without_most_central_edges(G, most_valuable_edge):
"""Returns the connected components of the graph that results from
repeatedly removing the most "valuable" edge in the graph.
`G` must be a non-empty graph. This function modifies the graph `G`
in-place; that is, it removes edges on the graph `G`.
`most_valuable_edge` is a function that takes the graph `G` as input
(or a subgraph with one or more edges of `G` removed) and returns an
edge. That edge will be removed and this process will be repeated
until the number of connected components in the graph increases.
"""
original_num_components = nx.number_connected_components(G)
num_new_components = original_num_components
while num_new_components <= original_num_components:
edge = most_valuable_edge(G)
G.remove_edge(*edge)
new_components = tuple(nx.connected_components(G))
num_new_components = len(new_components)
return new_components

View file

@ -0,0 +1,27 @@
"""Helper functions for community-finding algorithms."""
__all__ = ["is_partition"]
def is_partition(G, communities):
"""Returns *True* if `communities` is a partition of the nodes of `G`.
A partition of a universe set is a family of pairwise disjoint sets
whose union is the entire universe set.
Parameters
----------
G : NetworkX graph.
communities : list or iterable of sets of nodes
If not a list, the iterable is converted internally to a list.
If it is an iterator it is exhausted.
"""
# Alternate implementation:
# return all(sum(1 if v in c else 0 for c in communities) == 1 for v in G)
if not isinstance(communities, list):
communities = list(communities)
nodes = {n for c in communities for n in c if n in G}
return len(G) == len(nodes) == sum(len(c) for c in communities)

View file

@ -0,0 +1,78 @@
from collections import defaultdict
import networkx as nx
__all__ = ["k_clique_communities"]
def k_clique_communities(G, k, cliques=None):
"""Find k-clique communities in graph using the percolation method.
A k-clique community is the union of all cliques of size k that
can be reached through adjacent (sharing k-1 nodes) k-cliques.
Parameters
----------
G : NetworkX graph
k : int
Size of smallest clique
cliques: list or generator
Precomputed cliques (use networkx.find_cliques(G))
Returns
-------
Yields sets of nodes, one for each k-clique community.
Examples
--------
>>> from networkx.algorithms.community import k_clique_communities
>>> G = nx.complete_graph(5)
>>> K5 = nx.convert_node_labels_to_integers(G, first_label=2)
>>> G.add_edges_from(K5.edges())
>>> c = list(k_clique_communities(G, 4))
>>> sorted(list(c[0]))
[0, 1, 2, 3, 4, 5, 6]
>>> list(k_clique_communities(G, 6))
[]
References
----------
.. [1] Gergely Palla, Imre Derényi, Illés Farkas1, and Tamás Vicsek,
Uncovering the overlapping community structure of complex networks
in nature and society Nature 435, 814-818, 2005,
doi:10.1038/nature03607
"""
if k < 2:
raise nx.NetworkXError(f"k={k}, k must be greater than 1.")
if cliques is None:
cliques = nx.find_cliques(G)
cliques = [frozenset(c) for c in cliques if len(c) >= k]
# First index which nodes are in which cliques
membership_dict = defaultdict(list)
for clique in cliques:
for node in clique:
membership_dict[node].append(clique)
# For each clique, see which adjacent cliques percolate
perc_graph = nx.Graph()
perc_graph.add_nodes_from(cliques)
for clique in cliques:
for adj_clique in _get_adjacent_cliques(clique, membership_dict):
if len(clique.intersection(adj_clique)) >= (k - 1):
perc_graph.add_edge(clique, adj_clique)
# Connected components of clique graph with perc edges
# are the percolated cliques
for component in nx.connected_components(perc_graph):
yield (frozenset.union(*component))
def _get_adjacent_cliques(clique, membership_dict):
adjacent_cliques = set()
for n in clique:
for adj_clique in membership_dict[n]:
if clique != adj_clique:
adjacent_cliques.add(adj_clique)
return adjacent_cliques

View file

@ -0,0 +1,134 @@
"""Functions for computing the KernighanLin bipartition algorithm."""
import networkx as nx
from itertools import count
from networkx.utils import not_implemented_for, py_random_state, BinaryHeap
from networkx.algorithms.community.community_utils import is_partition
__all__ = ["kernighan_lin_bisection"]
def _kernighan_lin_sweep(edges, side):
"""
This is a modified form of Kernighan-Lin, which moves single nodes at a
time, alternating between sides to keep the bisection balanced. We keep
two min-heaps of swap costs to make optimal-next-move selection fast.
"""
costs0, costs1 = costs = BinaryHeap(), BinaryHeap()
for u, side_u, edges_u in zip(count(), side, edges):
cost_u = sum(w if side[v] else -w for v, w in edges_u)
costs[side_u].insert(u, cost_u if side_u else -cost_u)
def _update_costs(costs_x, x):
for y, w in edges[x]:
costs_y = costs[side[y]]
cost_y = costs_y.get(y)
if cost_y is not None:
cost_y += 2 * (-w if costs_x is costs_y else w)
costs_y.insert(y, cost_y, True)
i = totcost = 0
while costs0 and costs1:
u, cost_u = costs0.pop()
_update_costs(costs0, u)
v, cost_v = costs1.pop()
_update_costs(costs1, v)
totcost += cost_u + cost_v
yield totcost, i, (u, v)
@py_random_state(4)
@not_implemented_for("directed")
def kernighan_lin_bisection(G, partition=None, max_iter=10, weight="weight", seed=None):
"""Partition a graph into two blocks using the KernighanLin
algorithm.
This algorithm partitions a network into two sets by iteratively
swapping pairs of nodes to reduce the edge cut between the two sets. The
pairs are chosen according to a modified form of Kernighan-Lin, which
moves node individually, alternating between sides to keep the bisection
balanced.
Parameters
----------
G : graph
partition : tuple
Pair of iterables containing an initial partition. If not
specified, a random balanced partition is used.
max_iter : int
Maximum number of times to attempt swaps to find an
improvemement before giving up.
weight : key
Edge data key to use as weight. If None, the weights are all
set to one.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Only used if partition is None
Returns
-------
partition : tuple
A pair of sets of nodes representing the bipartition.
Raises
-------
NetworkXError
If partition is not a valid partition of the nodes of the graph.
References
----------
.. [1] Kernighan, B. W.; Lin, Shen (1970).
"An efficient heuristic procedure for partitioning graphs."
*Bell Systems Technical Journal* 49: 291--307.
Oxford University Press 2011.
"""
n = len(G)
labels = list(G)
seed.shuffle(labels)
index = {v: i for i, v in enumerate(labels)}
if partition is None:
side = [0] * (n // 2) + [1] * ((n + 1) // 2)
else:
try:
A, B = partition
except (TypeError, ValueError) as e:
raise nx.NetworkXError("partition must be two sets") from e
if not is_partition(G, (A, B)):
raise nx.NetworkXError("partition invalid")
side = [0] * n
for a in A:
side[a] = 1
if G.is_multigraph():
edges = [
[
(index[u], sum(e.get(weight, 1) for e in d.values()))
for u, d in G[v].items()
]
for v in labels
]
else:
edges = [
[(index[u], e.get(weight, 1)) for u, e in G[v].items()] for v in labels
]
for i in range(max_iter):
costs = list(_kernighan_lin_sweep(edges, side))
min_cost, min_i, _ = min(costs)
if min_cost >= 0:
break
for _, _, (u, v) in costs[: min_i + 1]:
side[u] = 1
side[v] = 0
A = {u for u, s in zip(labels, side) if s == 0}
B = {u for u, s in zip(labels, side) if s == 1}
return A, B

View file

@ -0,0 +1,198 @@
"""
Label propagation community detection algorithms.
"""
from collections import Counter
import networkx as nx
from networkx.utils import groups
from networkx.utils import not_implemented_for
from networkx.utils import py_random_state
__all__ = ["label_propagation_communities", "asyn_lpa_communities"]
@py_random_state(2)
def asyn_lpa_communities(G, weight=None, seed=None):
"""Returns communities in `G` as detected by asynchronous label
propagation.
The asynchronous label propagation algorithm is described in
[1]_. The algorithm is probabilistic and the found communities may
vary on different executions.
The algorithm proceeds as follows. After initializing each node with
a unique label, the algorithm repeatedly sets the label of a node to
be the label that appears most frequently among that nodes
neighbors. The algorithm halts when each node has the label that
appears most frequently among its neighbors. The algorithm is
asynchronous because each node is updated without waiting for
updates on the remaining nodes.
This generalized version of the algorithm in [1]_ accepts edge
weights.
Parameters
----------
G : Graph
weight : string
The edge attribute representing the weight of an edge.
If None, each edge is assumed to have weight one. In this
algorithm, the weight of an edge is used in determining the
frequency with which a label appears among the neighbors of a
node: a higher weight means the label appears more often.
seed : integer, random_state, or None (default)
Indicator of random number generation state.
See :ref:`Randomness<randomness>`.
Returns
-------
communities : iterable
Iterable of communities given as sets of nodes.
Notes
------
Edge weight attributes must be numerical.
References
----------
.. [1] Raghavan, Usha Nandini, Réka Albert, and Soundar Kumara. "Near
linear time algorithm to detect community structures in large-scale
networks." Physical Review E 76.3 (2007): 036106.
"""
labels = {n: i for i, n in enumerate(G)}
cont = True
while cont:
cont = False
nodes = list(G)
seed.shuffle(nodes)
# Calculate the label for each node
for node in nodes:
if len(G[node]) < 1:
continue
# Get label frequencies. Depending on the order they are processed
# in some nodes with be in t and others in t-1, making the
# algorithm asynchronous.
label_freq = Counter()
for v in G[node]:
label_freq.update(
{labels[v]: G.edges[node, v][weight] if weight else 1}
)
# Choose the label with the highest frecuency. If more than 1 label
# has the highest frecuency choose one randomly.
max_freq = max(label_freq.values())
best_labels = [
label for label, freq in label_freq.items() if freq == max_freq
]
# Continue until all nodes have a majority label
if labels[node] not in best_labels:
labels[node] = seed.choice(best_labels)
cont = True
yield from groups(labels).values()
@not_implemented_for("directed")
def label_propagation_communities(G):
"""Generates community sets determined by label propagation
Finds communities in `G` using a semi-synchronous label propagation
method[1]_. This method combines the advantages of both the synchronous
and asynchronous models. Not implemented for directed graphs.
Parameters
----------
G : graph
An undirected NetworkX graph.
Yields
------
communities : generator
Yields sets of the nodes in each community.
Raises
------
NetworkXNotImplemented
If the graph is directed
References
----------
.. [1] Cordasco, G., & Gargano, L. (2010, December). Community detection
via semi-synchronous label propagation algorithms. In Business
Applications of Social Network Analysis (BASNA), 2010 IEEE International
Workshop on (pp. 1-8). IEEE.
"""
coloring = _color_network(G)
# Create a unique label for each node in the graph
labeling = {v: k for k, v in enumerate(G)}
while not _labeling_complete(labeling, G):
# Update the labels of every node with the same color.
for color, nodes in coloring.items():
for n in nodes:
_update_label(n, labeling, G)
for label in set(labeling.values()):
yield {x for x in labeling if labeling[x] == label}
def _color_network(G):
"""Colors the network so that neighboring nodes all have distinct colors.
Returns a dict keyed by color to a set of nodes with that color.
"""
coloring = dict() # color => set(node)
colors = nx.coloring.greedy_color(G)
for node, color in colors.items():
if color in coloring:
coloring[color].add(node)
else:
coloring[color] = {node}
return coloring
def _labeling_complete(labeling, G):
"""Determines whether or not LPA is done.
Label propagation is complete when all nodes have a label that is
in the set of highest frequency labels amongst its neighbors.
Nodes with no neighbors are considered complete.
"""
return all(
labeling[v] in _most_frequent_labels(v, labeling, G) for v in G if len(G[v]) > 0
)
def _most_frequent_labels(node, labeling, G):
"""Returns a set of all labels with maximum frequency in `labeling`.
Input `labeling` should be a dict keyed by node to labels.
"""
if not G[node]:
# Nodes with no neighbors are themselves a community and are labeled
# accordingly, hence the immediate if statement.
return {labeling[node]}
# Compute the frequencies of all neighbours of node
freqs = Counter(labeling[q] for q in G[node])
max_freq = max(freqs.values())
return {label for label, freq in freqs.items() if freq == max_freq}
def _update_label(node, labeling, G):
"""Updates the label of a node using the Prec-Max tie breaking algorithm
The algorithm is explained in: 'Community Detection via Semi-Synchronous
Label Propagation Algorithms' Cordasco and Gargano, 2011
"""
high_labels = _most_frequent_labels(node, labeling, G)
if len(high_labels) == 1:
labeling[node] = high_labels.pop()
elif len(high_labels) > 1:
# Prec-Max
if labeling[node] not in high_labels:
labeling[node] = max(high_labels)

View file

@ -0,0 +1,227 @@
"""Lukes Algorithm for exact optimal weighted tree partitioning."""
from copy import deepcopy
from functools import lru_cache
from random import choice
import networkx as nx
from networkx.utils import not_implemented_for
__all__ = ["lukes_partitioning"]
D_EDGE_W = "weight"
D_EDGE_VALUE = 1.0
D_NODE_W = "weight"
D_NODE_VALUE = 1
PKEY = "partitions"
CLUSTER_EVAL_CACHE_SIZE = 2048
def _split_n_from(n: int, min_size_of_first_part: int):
# splits j in two parts of which the first is at least
# the second argument
assert n >= min_size_of_first_part
for p1 in range(min_size_of_first_part, n + 1):
yield p1, n - p1
def lukes_partitioning(G, max_size: int, node_weight=None, edge_weight=None) -> list:
"""Optimal partitioning of a weighted tree using the Lukes algorithm.
This algorithm partitions a connected, acyclic graph featuring integer
node weights and float edge weights. The resulting clusters are such
that the total weight of the nodes in each cluster does not exceed
max_size and that the weight of the edges that are cut by the partition
is minimum. The algorithm is based on LUKES[1].
Parameters
----------
G : graph
max_size : int
Maximum weight a partition can have in terms of sum of
node_weight for all nodes in the partition
edge_weight : key
Edge data key to use as weight. If None, the weights are all
set to one.
node_weight : key
Node data key to use as weight. If None, the weights are all
set to one. The data must be int.
Returns
-------
partition : list
A list of sets of nodes representing the clusters of the
partition.
Raises
-------
NotATree
If G is not a tree.
TypeError
If any of the values of node_weight is not int.
References
----------
.. Lukes, J. A. (1974).
"Efficient Algorithm for the Partitioning of Trees."
IBM Journal of Research and Development, 18(3), 217224.
"""
# First sanity check and tree preparation
if not nx.is_tree(G):
raise nx.NotATree("lukes_partitioning works only on trees")
else:
if nx.is_directed(G):
root = [n for n, d in G.in_degree() if d == 0]
assert len(root) == 1
root = root[0]
t_G = deepcopy(G)
else:
root = choice(list(G.nodes))
# this has the desirable side effect of not inheriting attributes
t_G = nx.dfs_tree(G, root)
# Since we do not want to screw up the original graph,
# if we have a blank attribute, we make a deepcopy
if edge_weight is None or node_weight is None:
safe_G = deepcopy(G)
if edge_weight is None:
nx.set_edge_attributes(safe_G, D_EDGE_VALUE, D_EDGE_W)
edge_weight = D_EDGE_W
if node_weight is None:
nx.set_node_attributes(safe_G, D_NODE_VALUE, D_NODE_W)
node_weight = D_NODE_W
else:
safe_G = G
# Second sanity check
# The values of node_weight MUST BE int.
# I cannot see any room for duck typing without incurring serious
# danger of subtle bugs.
all_n_attr = nx.get_node_attributes(safe_G, node_weight).values()
for x in all_n_attr:
if not isinstance(x, int):
raise TypeError(
"lukes_partitioning needs integer "
f"values for node_weight ({node_weight})"
)
# SUBROUTINES -----------------------
# these functions are defined here for two reasons:
# - brevity: we can leverage global "safe_G"
# - caching: signatures are hashable
@not_implemented_for("undirected")
# this is intended to be called only on t_G
def _leaves(gr):
for x in gr.nodes:
if not nx.descendants(gr, x):
yield x
@not_implemented_for("undirected")
def _a_parent_of_leaves_only(gr):
tleaves = set(_leaves(gr))
for n in set(gr.nodes) - tleaves:
if all([x in tleaves for x in nx.descendants(gr, n)]):
return n
@lru_cache(CLUSTER_EVAL_CACHE_SIZE)
def _value_of_cluster(cluster: frozenset):
valid_edges = [e for e in safe_G.edges if e[0] in cluster and e[1] in cluster]
return sum([safe_G.edges[e][edge_weight] for e in valid_edges])
def _value_of_partition(partition: list):
return sum([_value_of_cluster(frozenset(c)) for c in partition])
@lru_cache(CLUSTER_EVAL_CACHE_SIZE)
def _weight_of_cluster(cluster: frozenset):
return sum([safe_G.nodes[n][node_weight] for n in cluster])
def _pivot(partition: list, node):
ccx = [c for c in partition if node in c]
assert len(ccx) == 1
return ccx[0]
def _concatenate_or_merge(partition_1: list, partition_2: list, x, i, ref_weigth):
ccx = _pivot(partition_1, x)
cci = _pivot(partition_2, i)
merged_xi = ccx.union(cci)
# We first check if we can do the merge.
# If so, we do the actual calculations, otherwise we concatenate
if _weight_of_cluster(frozenset(merged_xi)) <= ref_weigth:
cp1 = list(filter(lambda x: x != ccx, partition_1))
cp2 = list(filter(lambda x: x != cci, partition_2))
option_2 = [merged_xi] + cp1 + cp2
return option_2, _value_of_partition(option_2)
else:
option_1 = partition_1 + partition_2
return option_1, _value_of_partition(option_1)
# INITIALIZATION -----------------------
leaves = set(_leaves(t_G))
for lv in leaves:
t_G.nodes[lv][PKEY] = dict()
slot = safe_G.nodes[lv][node_weight]
t_G.nodes[lv][PKEY][slot] = [{lv}]
t_G.nodes[lv][PKEY][0] = [{lv}]
for inner in [x for x in t_G.nodes if x not in leaves]:
t_G.nodes[inner][PKEY] = dict()
slot = safe_G.nodes[inner][node_weight]
t_G.nodes[inner][PKEY][slot] = [{inner}]
# CORE ALGORITHM -----------------------
while True:
x_node = _a_parent_of_leaves_only(t_G)
weight_of_x = safe_G.nodes[x_node][node_weight]
best_value = 0
best_partition = None
bp_buffer = dict()
x_descendants = nx.descendants(t_G, x_node)
for i_node in x_descendants:
for j in range(weight_of_x, max_size + 1):
for a, b in _split_n_from(j, weight_of_x):
if (
a not in t_G.nodes[x_node][PKEY].keys()
or b not in t_G.nodes[i_node][PKEY].keys()
):
# it's not possible to form this particular weight sum
continue
part1 = t_G.nodes[x_node][PKEY][a]
part2 = t_G.nodes[i_node][PKEY][b]
part, value = _concatenate_or_merge(part1, part2, x_node, i_node, j)
if j not in bp_buffer.keys() or bp_buffer[j][1] < value:
# we annotate in the buffer the best partition for j
bp_buffer[j] = part, value
# we also keep track of the overall best partition
if best_value <= value:
best_value = value
best_partition = part
# as illustrated in Lukes, once we finished a child, we can
# discharge the partitions we found into the graph
# (the key phrase is make all x == x')
# so that they are used by the subsequent children
for w, (best_part_for_vl, vl) in bp_buffer.items():
t_G.nodes[x_node][PKEY][w] = best_part_for_vl
bp_buffer.clear()
# the absolute best partition for this node
# across all weights has to be stored at 0
t_G.nodes[x_node][PKEY][0] = best_partition
t_G.remove_nodes_from(x_descendants)
if x_node == root:
# the 0-labeled partition of root
# is the optimal one for the whole tree
return t_G.nodes[root][PKEY][0]

View file

@ -0,0 +1,265 @@
# TODO:
# - Alter equations for weighted case
# - Write tests for weighted case
"""Functions for detecting communities based on modularity.
"""
from networkx.algorithms.community.quality import modularity
from networkx.utils.mapped_queue import MappedQueue
__all__ = [
"greedy_modularity_communities",
"naive_greedy_modularity_communities",
"_naive_greedy_modularity_communities",
]
def greedy_modularity_communities(G, weight=None):
"""Find communities in graph using Clauset-Newman-Moore greedy modularity
maximization. This method currently supports the Graph class and does not
consider edge weights.
Greedy modularity maximization begins with each node in its own community
and joins the pair of communities that most increases modularity until no
such pair exists.
Parameters
----------
G : NetworkX graph
Returns
-------
Yields sets of nodes, one for each community.
Examples
--------
>>> from networkx.algorithms.community import greedy_modularity_communities
>>> G = nx.karate_club_graph()
>>> c = list(greedy_modularity_communities(G))
>>> sorted(c[0])
[8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
References
----------
.. [1] M. E. J Newman 'Networks: An Introduction', page 224
Oxford University Press 2011.
.. [2] Clauset, A., Newman, M. E., & Moore, C.
"Finding community structure in very large networks."
Physical Review E 70(6), 2004.
"""
# Count nodes and edges
N = len(G.nodes())
m = sum([d.get("weight", 1) for u, v, d in G.edges(data=True)])
q0 = 1.0 / (2.0 * m)
# Map node labels to contiguous integers
label_for_node = {i: v for i, v in enumerate(G.nodes())}
node_for_label = {label_for_node[i]: i for i in range(N)}
# Calculate degrees
k_for_label = G.degree(G.nodes(), weight=weight)
k = [k_for_label[label_for_node[i]] for i in range(N)]
# Initialize community and merge lists
communities = {i: frozenset([i]) for i in range(N)}
merges = []
# Initial modularity
partition = [[label_for_node[x] for x in c] for c in communities.values()]
q_cnm = modularity(G, partition)
# Initialize data structures
# CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji)
# a[i]: fraction of edges within community i
# dq_dict[i][j]: dQ for merging community i, j
# dq_heap[i][n] : (-dq, i, j) for communitiy i nth largest dQ
# H[n]: (-dq, i, j) for community with nth largest max_j(dQ_ij)
a = [k[i] * q0 for i in range(N)]
dq_dict = {
i: {
j: 2 * q0 - 2 * k[i] * k[j] * q0 * q0
for j in [node_for_label[u] for u in G.neighbors(label_for_node[i])]
if j != i
}
for i in range(N)
}
dq_heap = [
MappedQueue([(-dq, i, j) for j, dq in dq_dict[i].items()]) for i in range(N)
]
H = MappedQueue([dq_heap[i].h[0] for i in range(N) if len(dq_heap[i]) > 0])
# Merge communities until we can't improve modularity
while len(H) > 1:
# Find best merge
# Remove from heap of row maxes
# Ties will be broken by choosing the pair with lowest min community id
try:
dq, i, j = H.pop()
except IndexError:
break
dq = -dq
# Remove best merge from row i heap
dq_heap[i].pop()
# Push new row max onto H
if len(dq_heap[i]) > 0:
H.push(dq_heap[i].h[0])
# If this element was also at the root of row j, we need to remove the
# duplicate entry from H
if dq_heap[j].h[0] == (-dq, j, i):
H.remove((-dq, j, i))
# Remove best merge from row j heap
dq_heap[j].remove((-dq, j, i))
# Push new row max onto H
if len(dq_heap[j]) > 0:
H.push(dq_heap[j].h[0])
else:
# Duplicate wasn't in H, just remove from row j heap
dq_heap[j].remove((-dq, j, i))
# Stop when change is non-positive
if dq <= 0:
break
# Perform merge
communities[j] = frozenset(communities[i] | communities[j])
del communities[i]
merges.append((i, j, dq))
# New modularity
q_cnm += dq
# Get list of communities connected to merged communities
i_set = set(dq_dict[i].keys())
j_set = set(dq_dict[j].keys())
all_set = (i_set | j_set) - {i, j}
both_set = i_set & j_set
# Merge i into j and update dQ
for k in all_set:
# Calculate new dq value
if k in both_set:
dq_jk = dq_dict[j][k] + dq_dict[i][k]
elif k in j_set:
dq_jk = dq_dict[j][k] - 2.0 * a[i] * a[k]
else:
# k in i_set
dq_jk = dq_dict[i][k] - 2.0 * a[j] * a[k]
# Update rows j and k
for row, col in [(j, k), (k, j)]:
# Save old value for finding heap index
if k in j_set:
d_old = (-dq_dict[row][col], row, col)
else:
d_old = None
# Update dict for j,k only (i is removed below)
dq_dict[row][col] = dq_jk
# Save old max of per-row heap
if len(dq_heap[row]) > 0:
d_oldmax = dq_heap[row].h[0]
else:
d_oldmax = None
# Add/update heaps
d = (-dq_jk, row, col)
if d_old is None:
# We're creating a new nonzero element, add to heap
dq_heap[row].push(d)
else:
# Update existing element in per-row heap
dq_heap[row].update(d_old, d)
# Update heap of row maxes if necessary
if d_oldmax is None:
# No entries previously in this row, push new max
H.push(d)
else:
# We've updated an entry in this row, has the max changed?
if dq_heap[row].h[0] != d_oldmax:
H.update(d_oldmax, dq_heap[row].h[0])
# Remove row/col i from matrix
i_neighbors = dq_dict[i].keys()
for k in i_neighbors:
# Remove from dict
dq_old = dq_dict[k][i]
del dq_dict[k][i]
# Remove from heaps if we haven't already
if k != j:
# Remove both row and column
for row, col in [(k, i), (i, k)]:
# Check if replaced dq is row max
d_old = (-dq_old, row, col)
if dq_heap[row].h[0] == d_old:
# Update per-row heap and heap of row maxes
dq_heap[row].remove(d_old)
H.remove(d_old)
# Update row max
if len(dq_heap[row]) > 0:
H.push(dq_heap[row].h[0])
else:
# Only update per-row heap
dq_heap[row].remove(d_old)
del dq_dict[i]
# Mark row i as deleted, but keep placeholder
dq_heap[i] = MappedQueue()
# Merge i into j and update a
a[j] += a[i]
a[i] = 0
communities = [
frozenset([label_for_node[i] for i in c]) for c in communities.values()
]
return sorted(communities, key=len, reverse=True)
def naive_greedy_modularity_communities(G):
"""Find communities in graph using the greedy modularity maximization.
This implementation is O(n^4), much slower than alternatives, but it is
provided as an easy-to-understand reference implementation.
"""
# First create one community for each node
communities = list([frozenset([u]) for u in G.nodes()])
# Track merges
merges = []
# Greedily merge communities until no improvement is possible
old_modularity = None
new_modularity = modularity(G, communities)
while old_modularity is None or new_modularity > old_modularity:
# Save modularity for comparison
old_modularity = new_modularity
# Find best pair to merge
trial_communities = list(communities)
to_merge = None
for i, u in enumerate(communities):
for j, v in enumerate(communities):
# Skip i=j and empty communities
if j <= i or len(u) == 0 or len(v) == 0:
continue
# Merge communities u and v
trial_communities[j] = u | v
trial_communities[i] = frozenset([])
trial_modularity = modularity(G, trial_communities)
if trial_modularity >= new_modularity:
# Check if strictly better or tie
if trial_modularity > new_modularity:
# Found new best, save modularity and group indexes
new_modularity = trial_modularity
to_merge = (i, j, new_modularity - old_modularity)
elif to_merge and min(i, j) < min(to_merge[0], to_merge[1]):
# Break ties by choosing pair with lowest min id
new_modularity = trial_modularity
to_merge = (i, j, new_modularity - old_modularity)
# Un-merge
trial_communities[i] = u
trial_communities[j] = v
if to_merge is not None:
# If the best merge improves modularity, use it
merges.append(to_merge)
i, j, dq = to_merge
u, v = communities[i], communities[j]
communities[j] = u | v
communities[i] = frozenset([])
# Remove empty communities and sort
communities = [c for c in communities if len(c) > 0]
yield from sorted(communities, key=lambda x: len(x), reverse=True)
# old name
_naive_greedy_modularity_communities = naive_greedy_modularity_communities

View file

@ -0,0 +1,334 @@
"""Functions for measuring the quality of a partition (into
communities).
"""
from functools import wraps
from itertools import product
import networkx as nx
from networkx import NetworkXError
from networkx.utils import not_implemented_for
from networkx.algorithms.community.community_utils import is_partition
__all__ = ["coverage", "modularity", "performance"]
class NotAPartition(NetworkXError):
"""Raised if a given collection is not a partition.
"""
def __init__(self, G, collection):
msg = f"{G} is not a valid partition of the graph {collection}"
super().__init__(msg)
def require_partition(func):
"""Decorator to check that a valid partition is input to a function
Raises :exc:`networkx.NetworkXError` if the partition is not valid.
This decorator should be used on functions whose first two arguments
are a graph and a partition of the nodes of that graph (in that
order)::
>>> @require_partition
... def foo(G, partition):
... print("partition is valid!")
...
>>> G = nx.complete_graph(5)
>>> partition = [{0, 1}, {2, 3}, {4}]
>>> foo(G, partition)
partition is valid!
>>> partition = [{0}, {2, 3}, {4}]
>>> foo(G, partition)
Traceback (most recent call last):
...
networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
>>> partition = [{0, 1}, {1, 2, 3}, {4}]
>>> foo(G, partition)
Traceback (most recent call last):
...
networkx.exception.NetworkXError: `partition` is not a valid partition of the nodes of G
"""
@wraps(func)
def new_func(*args, **kw):
# Here we assume that the first two arguments are (G, partition).
if not is_partition(*args[:2]):
raise nx.NetworkXError(
"`partition` is not a valid partition of" " the nodes of G"
)
return func(*args, **kw)
return new_func
def intra_community_edges(G, partition):
"""Returns the number of intra-community edges for a partition of `G`.
Parameters
----------
G : NetworkX graph.
partition : iterable of sets of nodes
This must be a partition of the nodes of `G`.
The "intra-community edges" are those edges joining a pair of nodes
in the same block of the partition.
"""
return sum(G.subgraph(block).size() for block in partition)
def inter_community_edges(G, partition):
"""Returns the number of inter-community edges for a prtition of `G`.
according to the given
partition of the nodes of `G`.
Parameters
----------
G : NetworkX graph.
partition : iterable of sets of nodes
This must be a partition of the nodes of `G`.
The *inter-community edges* are those edges joining a pair of nodes
in different blocks of the partition.
Implementation note: this function creates an intermediate graph
that may require the same amount of memory as that of `G`.
"""
# Alternate implementation that does not require constructing a new
# graph object (but does require constructing an affiliation
# dictionary):
#
# aff = dict(chain.from_iterable(((v, block) for v in block)
# for block in partition))
# return sum(1 for u, v in G.edges() if aff[u] != aff[v])
#
MG = nx.MultiDiGraph if G.is_directed() else nx.MultiGraph
return nx.quotient_graph(G, partition, create_using=MG).size()
def inter_community_non_edges(G, partition):
"""Returns the number of inter-community non-edges according to the
given partition of the nodes of `G`.
`G` must be a NetworkX graph.
`partition` must be a partition of the nodes of `G`.
A *non-edge* is a pair of nodes (undirected if `G` is undirected)
that are not adjacent in `G`. The *inter-community non-edges* are
those non-edges on a pair of nodes in different blocks of the
partition.
Implementation note: this function creates two intermediate graphs,
which may require up to twice the amount of memory as required to
store `G`.
"""
# Alternate implementation that does not require constructing two
# new graph objects (but does require constructing an affiliation
# dictionary):
#
# aff = dict(chain.from_iterable(((v, block) for v in block)
# for block in partition))
# return sum(1 for u, v in nx.non_edges(G) if aff[u] != aff[v])
#
return inter_community_edges(nx.complement(G), partition)
@not_implemented_for("multigraph")
@require_partition
def performance(G, partition):
"""Returns the performance of a partition.
The *performance* of a partition is the ratio of the number of
intra-community edges plus inter-community non-edges with the total
number of potential edges.
Parameters
----------
G : NetworkX graph
A simple graph (directed or undirected).
partition : sequence
Partition of the nodes of `G`, represented as a sequence of
sets of nodes. Each block of the partition represents a
community.
Returns
-------
float
The performance of the partition, as defined above.
Raises
------
NetworkXError
If `partition` is not a valid partition of the nodes of `G`.
References
----------
.. [1] Santo Fortunato.
"Community Detection in Graphs".
*Physical Reports*, Volume 486, Issue 3--5 pp. 75--174
<https://arxiv.org/abs/0906.0612>
"""
# Compute the number of intra-community edges and inter-community
# edges.
intra_edges = intra_community_edges(G, partition)
inter_edges = inter_community_non_edges(G, partition)
# Compute the number of edges in the complete graph (directed or
# undirected, as it depends on `G`) on `n` nodes.
#
# (If `G` is an undirected graph, we divide by two since we have
# double-counted each potential edge. We use integer division since
# `total_pairs` is guaranteed to be even.)
n = len(G)
total_pairs = n * (n - 1)
if not G.is_directed():
total_pairs //= 2
return (intra_edges + inter_edges) / total_pairs
@require_partition
def coverage(G, partition):
"""Returns the coverage of a partition.
The *coverage* of a partition is the ratio of the number of
intra-community edges to the total number of edges in the graph.
Parameters
----------
G : NetworkX graph
partition : sequence
Partition of the nodes of `G`, represented as a sequence of
sets of nodes. Each block of the partition represents a
community.
Returns
-------
float
The coverage of the partition, as defined above.
Raises
------
NetworkXError
If `partition` is not a valid partition of the nodes of `G`.
Notes
-----
If `G` is a multigraph, the multiplicity of edges is counted.
References
----------
.. [1] Santo Fortunato.
"Community Detection in Graphs".
*Physical Reports*, Volume 486, Issue 3--5 pp. 75--174
<https://arxiv.org/abs/0906.0612>
"""
intra_edges = intra_community_edges(G, partition)
total_edges = G.number_of_edges()
return intra_edges / total_edges
def modularity(G, communities, weight="weight"):
r"""Returns the modularity of the given partition of the graph.
Modularity is defined in [1]_ as
.. math::
Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \frac{k_ik_j}{2m}\right)
\delta(c_i,c_j)
where $m$ is the number of edges, $A$ is the adjacency matrix of
`G`, $k_i$ is the degree of $i$ and $\delta(c_i, c_j)$
is 1 if $i$ and $j$ are in the same community and 0 otherwise.
According to [2]_ (and verified by some algebra) this can be reduced to
.. math::
Q = \sum_{c=1}^{n}
\left[ \frac{L_c}{m} - \left( \frac{k_c}{2m} \right) ^2 \right]
where the sum iterates over all communities $c$, $m$ is the number of edges,
$L_c$ is the number of intra-community links for community $c$,
$k_c$ is the sum of degrees of the nodes in community $c$.
The second formula is the one actually used in calculation of the modularity.
Parameters
----------
G : NetworkX Graph
communities : list or iterable of set of nodes
These node sets must represent a partition of G's nodes.
weight : string or None, optional (default="weight")
The edge attribute that holds the numerical value used
as a weight. If None or an edge does not have that attribute,
then that edge has weight 1.
Returns
-------
Q : float
The modularity of the paritition.
Raises
------
NotAPartition
If `communities` is not a partition of the nodes of `G`.
Examples
--------
>>> import networkx.algorithms.community as nx_comm
>>> G = nx.barbell_graph(3, 0)
>>> nx_comm.modularity(G, [{0, 1, 2}, {3, 4, 5}])
0.35714285714285715
>>> nx_comm.modularity(G, nx_comm.label_propagation_communities(G))
0.35714285714285715
References
----------
.. [1] M. E. J. Newman *Networks: An Introduction*, page 224.
Oxford University Press, 2011.
.. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
"Finding community structure in very large networks."
Physical review E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
"""
if not isinstance(communities, list):
communities = list(communities)
if not is_partition(G, communities):
raise NotAPartition(G, communities)
directed = G.is_directed()
if directed:
out_degree = dict(G.out_degree(weight=weight))
in_degree = dict(G.in_degree(weight=weight))
m = sum(out_degree.values())
norm = 1 / m ** 2
else:
out_degree = in_degree = dict(G.degree(weight=weight))
deg_sum = sum(out_degree.values())
m = deg_sum / 2
norm = 1 / deg_sum ** 2
def community_contribution(community):
comm = set(community)
L_c = sum(wt for u, v, wt in G.edges(comm, data=weight, default=1) if v in comm)
out_degree_sum = sum(out_degree[u] for u in comm)
in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum
return L_c / m - out_degree_sum * in_degree_sum * norm
return sum(map(community_contribution, communities))

View file

@ -0,0 +1,127 @@
import pytest
from networkx import Graph, NetworkXError
from networkx.algorithms.community.asyn_fluid import asyn_fluidc
def test_exceptions():
test = Graph()
test.add_node("a")
pytest.raises(NetworkXError, asyn_fluidc, test, "hi")
pytest.raises(NetworkXError, asyn_fluidc, test, -1)
pytest.raises(NetworkXError, asyn_fluidc, test, 3)
test.add_node("b")
pytest.raises(NetworkXError, asyn_fluidc, test, 1)
def test_single_node():
test = Graph()
test.add_node("a")
# ground truth
ground_truth = {frozenset(["a"])}
communities = asyn_fluidc(test, 1)
result = {frozenset(c) for c in communities}
assert result == ground_truth
def test_two_nodes():
test = Graph()
test.add_edge("a", "b")
# ground truth
ground_truth = {frozenset(["a"]), frozenset(["b"])}
communities = asyn_fluidc(test, 2)
result = {frozenset(c) for c in communities}
assert result == ground_truth
def test_two_clique_communities():
test = Graph()
# c1
test.add_edge("a", "b")
test.add_edge("a", "c")
test.add_edge("b", "c")
# connection
test.add_edge("c", "d")
# c2
test.add_edge("d", "e")
test.add_edge("d", "f")
test.add_edge("f", "e")
# ground truth
ground_truth = {frozenset(["a", "c", "b"]), frozenset(["e", "d", "f"])}
communities = asyn_fluidc(test, 2, seed=7)
result = {frozenset(c) for c in communities}
assert result == ground_truth
def test_five_clique_ring():
test = Graph()
# c1
test.add_edge("1a", "1b")
test.add_edge("1a", "1c")
test.add_edge("1a", "1d")
test.add_edge("1b", "1c")
test.add_edge("1b", "1d")
test.add_edge("1c", "1d")
# c2
test.add_edge("2a", "2b")
test.add_edge("2a", "2c")
test.add_edge("2a", "2d")
test.add_edge("2b", "2c")
test.add_edge("2b", "2d")
test.add_edge("2c", "2d")
# c3
test.add_edge("3a", "3b")
test.add_edge("3a", "3c")
test.add_edge("3a", "3d")
test.add_edge("3b", "3c")
test.add_edge("3b", "3d")
test.add_edge("3c", "3d")
# c4
test.add_edge("4a", "4b")
test.add_edge("4a", "4c")
test.add_edge("4a", "4d")
test.add_edge("4b", "4c")
test.add_edge("4b", "4d")
test.add_edge("4c", "4d")
# c5
test.add_edge("5a", "5b")
test.add_edge("5a", "5c")
test.add_edge("5a", "5d")
test.add_edge("5b", "5c")
test.add_edge("5b", "5d")
test.add_edge("5c", "5d")
# connections
test.add_edge("1a", "2c")
test.add_edge("2a", "3c")
test.add_edge("3a", "4c")
test.add_edge("4a", "5c")
test.add_edge("5a", "1c")
# ground truth
ground_truth = {
frozenset(["1a", "1b", "1c", "1d"]),
frozenset(["2a", "2b", "2c", "2d"]),
frozenset(["3a", "3b", "3c", "3d"]),
frozenset(["4a", "4b", "4c", "4d"]),
frozenset(["5a", "5b", "5c", "5d"]),
}
communities = asyn_fluidc(test, 5, seed=9)
result = {frozenset(c) for c in communities}
assert result == ground_truth

View file

@ -0,0 +1,86 @@
"""Unit tests for the :mod:`networkx.algorithms.community.centrality`
module.
"""
from operator import itemgetter
import networkx as nx
from networkx.algorithms.community import girvan_newman
def set_of_sets(iterable):
return set(map(frozenset, iterable))
def validate_communities(result, expected):
assert set_of_sets(result) == set_of_sets(expected)
def validate_possible_communities(result, *expected):
assert any(set_of_sets(result) == set_of_sets(p) for p in expected)
class TestGirvanNewman:
"""Unit tests for the
:func:`networkx.algorithms.community.centrality.girvan_newman`
function.
"""
def test_no_edges(self):
G = nx.empty_graph(3)
communities = list(girvan_newman(G))
assert len(communities) == 1
validate_communities(communities[0], [{0}, {1}, {2}])
def test_undirected(self):
# Start with the graph .-.-.-.
G = nx.path_graph(4)
communities = list(girvan_newman(G))
assert len(communities) == 3
# After one removal, we get the graph .-. .-.
validate_communities(communities[0], [{0, 1}, {2, 3}])
# After the next, we get the graph .-. . ., but there are two
# symmetric possible versions.
validate_possible_communities(
communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}]
)
# After the last removal, we always get the empty graph.
validate_communities(communities[2], [{0}, {1}, {2}, {3}])
def test_directed(self):
G = nx.DiGraph(nx.path_graph(4))
communities = list(girvan_newman(G))
assert len(communities) == 3
validate_communities(communities[0], [{0, 1}, {2, 3}])
validate_possible_communities(
communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}]
)
validate_communities(communities[2], [{0}, {1}, {2}, {3}])
def test_selfloops(self):
G = nx.path_graph(4)
G.add_edge(0, 0)
G.add_edge(2, 2)
communities = list(girvan_newman(G))
assert len(communities) == 3
validate_communities(communities[0], [{0, 1}, {2, 3}])
validate_possible_communities(
communities[1], [{0}, {1}, {2, 3}], [{0, 1}, {2}, {3}]
)
validate_communities(communities[2], [{0}, {1}, {2}, {3}])
def test_most_valuable_edge(self):
G = nx.Graph()
G.add_weighted_edges_from([(0, 1, 3), (1, 2, 2), (2, 3, 1)])
# Let the most valuable edge be the one with the highest weight.
def heaviest(G):
return max(G.edges(data="weight"), key=itemgetter(2))[:2]
communities = list(girvan_newman(G, heaviest))
assert len(communities) == 3
validate_communities(communities[0], [{0}, {1, 2, 3}])
validate_communities(communities[1], [{0}, {1}, {2, 3}])
validate_communities(communities[2], [{0}, {1}, {2}, {3}])

View file

@ -0,0 +1,92 @@
from itertools import combinations
import pytest
import networkx as nx
from networkx.algorithms.community import k_clique_communities
def test_overlapping_K5():
G = nx.Graph()
G.add_edges_from(combinations(range(5), 2)) # Add a five clique
G.add_edges_from(combinations(range(2, 7), 2)) # Add another five clique
c = list(k_clique_communities(G, 4))
assert c == [frozenset(range(7))]
c = set(k_clique_communities(G, 5))
assert c == {frozenset(range(5)), frozenset(range(2, 7))}
def test_isolated_K5():
G = nx.Graph()
G.add_edges_from(combinations(range(0, 5), 2)) # Add a five clique
G.add_edges_from(combinations(range(5, 10), 2)) # Add another five clique
c = set(k_clique_communities(G, 5))
assert c == {frozenset(range(5)), frozenset(range(5, 10))}
class TestZacharyKarateClub:
def setup(self):
self.G = nx.karate_club_graph()
def _check_communities(self, k, expected):
communities = set(k_clique_communities(self.G, k))
assert communities == expected
def test_k2(self):
# clique percolation with k=2 is just connected components
expected = {frozenset(self.G)}
self._check_communities(2, expected)
def test_k3(self):
comm1 = [
0,
1,
2,
3,
7,
8,
12,
13,
14,
15,
17,
18,
19,
20,
21,
22,
23,
26,
27,
28,
29,
30,
31,
32,
33,
]
comm2 = [0, 4, 5, 6, 10, 16]
comm3 = [24, 25, 31]
expected = {frozenset(comm1), frozenset(comm2), frozenset(comm3)}
self._check_communities(3, expected)
def test_k4(self):
expected = {
frozenset([0, 1, 2, 3, 7, 13]),
frozenset([8, 32, 30, 33]),
frozenset([32, 33, 29, 23]),
}
self._check_communities(4, expected)
def test_k5(self):
expected = {frozenset([0, 1, 2, 3, 7, 13])}
self._check_communities(5, expected)
def test_k6(self):
expected = set()
self._check_communities(6, expected)
def test_bad_k():
with pytest.raises(nx.NetworkXError):
list(k_clique_communities(nx.Graph(), 1))

View file

@ -0,0 +1,59 @@
"""Unit tests for the :mod:`networkx.algorithms.community.kernighan_lin`
module.
"""
import pytest
import networkx as nx
from networkx.algorithms.community import kernighan_lin_bisection
from itertools import permutations
def assert_partition_equal(x, y):
assert set(map(frozenset, x)) == set(map(frozenset, y))
def test_partition():
G = nx.barbell_graph(3, 0)
C = kernighan_lin_bisection(G)
assert_partition_equal(C, [{0, 1, 2}, {3, 4, 5}])
def test_partition_argument():
G = nx.barbell_graph(3, 0)
partition = [{0, 1, 2}, {3, 4, 5}]
C = kernighan_lin_bisection(G, partition)
assert_partition_equal(C, partition)
def test_seed_argument():
G = nx.barbell_graph(3, 0)
C = kernighan_lin_bisection(G, seed=1)
assert_partition_equal(C, [{0, 1, 2}, {3, 4, 5}])
def test_non_disjoint_partition():
with pytest.raises(nx.NetworkXError):
G = nx.barbell_graph(3, 0)
partition = ({0, 1, 2}, {2, 3, 4, 5})
kernighan_lin_bisection(G, partition)
def test_too_many_blocks():
with pytest.raises(nx.NetworkXError):
G = nx.barbell_graph(3, 0)
partition = ({0, 1}, {2}, {3, 4, 5})
kernighan_lin_bisection(G, partition)
def test_multigraph():
G = nx.cycle_graph(4)
M = nx.MultiGraph(G.edges())
M.add_edges_from(G.edges())
M.remove_edge(1, 2)
for labels in permutations(range(4)):
mapping = dict(zip(M, labels))
A, B = kernighan_lin_bisection(nx.relabel_nodes(M, mapping), seed=0)
assert_partition_equal(
[A, B], [{mapping[0], mapping[1]}, {mapping[2], mapping[3]}]
)

View file

@ -0,0 +1,154 @@
from itertools import chain
from itertools import combinations
import pytest
import networkx as nx
from networkx.algorithms.community import label_propagation_communities
from networkx.algorithms.community import asyn_lpa_communities
def test_directed_not_supported():
with pytest.raises(nx.NetworkXNotImplemented):
# not supported for directed graphs
test = nx.DiGraph()
test.add_edge("a", "b")
test.add_edge("a", "c")
test.add_edge("b", "d")
result = label_propagation_communities(test)
def test_one_node():
test = nx.Graph()
test.add_node("a")
# The expected communities are:
ground_truth = {frozenset(["a"])}
communities = label_propagation_communities(test)
result = {frozenset(c) for c in communities}
assert result == ground_truth
def test_unconnected_communities():
test = nx.Graph()
# community 1
test.add_edge("a", "c")
test.add_edge("a", "d")
test.add_edge("d", "c")
# community 2
test.add_edge("b", "e")
test.add_edge("e", "f")
test.add_edge("f", "b")
# The expected communities are:
ground_truth = {frozenset(["a", "c", "d"]), frozenset(["b", "e", "f"])}
communities = label_propagation_communities(test)
result = {frozenset(c) for c in communities}
assert result == ground_truth
def test_connected_communities():
test = nx.Graph()
# community 1
test.add_edge("a", "b")
test.add_edge("c", "a")
test.add_edge("c", "b")
test.add_edge("d", "a")
test.add_edge("d", "b")
test.add_edge("d", "c")
test.add_edge("e", "a")
test.add_edge("e", "b")
test.add_edge("e", "c")
test.add_edge("e", "d")
# community 2
test.add_edge("1", "2")
test.add_edge("3", "1")
test.add_edge("3", "2")
test.add_edge("4", "1")
test.add_edge("4", "2")
test.add_edge("4", "3")
test.add_edge("5", "1")
test.add_edge("5", "2")
test.add_edge("5", "3")
test.add_edge("5", "4")
# edge between community 1 and 2
test.add_edge("a", "1")
# community 3
test.add_edge("x", "y")
# community 4 with only a single node
test.add_node("z")
# The expected communities are:
ground_truth1 = {
frozenset(["a", "b", "c", "d", "e"]),
frozenset(["1", "2", "3", "4", "5"]),
frozenset(["x", "y"]),
frozenset(["z"]),
}
ground_truth2 = {
frozenset(["a", "b", "c", "d", "e", "1", "2", "3", "4", "5"]),
frozenset(["x", "y"]),
frozenset(["z"]),
}
ground_truth = (ground_truth1, ground_truth2)
communities = label_propagation_communities(test)
result = {frozenset(c) for c in communities}
assert result in ground_truth
def test_termination():
# ensure termination of asyn_lpa_communities in two cases
# that led to an endless loop in a previous version
test1 = nx.karate_club_graph()
test2 = nx.caveman_graph(2, 10)
test2.add_edges_from([(0, 20), (20, 10)])
asyn_lpa_communities(test1)
asyn_lpa_communities(test2)
class TestAsynLpaCommunities:
def _check_communities(self, G, expected):
"""Checks that the communities computed from the given graph ``G``
using the :func:`~networkx.asyn_lpa_communities` function match
the set of nodes given in ``expected``.
``expected`` must be a :class:`set` of :class:`frozenset`
instances, each element of which is a node in the graph.
"""
communities = asyn_lpa_communities(G)
result = {frozenset(c) for c in communities}
assert result == expected
def test_null_graph(self):
G = nx.null_graph()
ground_truth = set()
self._check_communities(G, ground_truth)
def test_single_node(self):
G = nx.empty_graph(1)
ground_truth = {frozenset([0])}
self._check_communities(G, ground_truth)
def test_simple_communities(self):
# This graph is the disjoint union of two triangles.
G = nx.Graph(["ab", "ac", "bc", "de", "df", "fe"])
ground_truth = {frozenset("abc"), frozenset("def")}
self._check_communities(G, ground_truth)
def test_seed_argument(self):
G = nx.Graph(["ab", "ac", "bc", "de", "df", "fe"])
ground_truth = {frozenset("abc"), frozenset("def")}
communities = asyn_lpa_communities(G, seed=1)
result = {frozenset(c) for c in communities}
assert result == ground_truth
def test_several_communities(self):
# This graph is the disjoint union of five triangles.
ground_truth = {frozenset(range(3 * i, 3 * (i + 1))) for i in range(5)}
edges = chain.from_iterable(combinations(c, 2) for c in ground_truth)
G = nx.Graph(edges)
self._check_communities(G, ground_truth)

View file

@ -0,0 +1,154 @@
from itertools import product
import pytest
import networkx as nx
from networkx.algorithms.community import lukes_partitioning
EWL = "e_weight"
NWL = "n_weight"
# first test from the Lukes original paper
def paper_1_case(float_edge_wt=False, explicit_node_wt=True, directed=False):
# problem-specific constants
limit = 3
# configuration
if float_edge_wt:
shift = 0.001
else:
shift = 0
if directed:
example_1 = nx.DiGraph()
else:
example_1 = nx.Graph()
# graph creation
example_1.add_edge(1, 2, **{EWL: 3 + shift})
example_1.add_edge(1, 4, **{EWL: 2 + shift})
example_1.add_edge(2, 3, **{EWL: 4 + shift})
example_1.add_edge(2, 5, **{EWL: 6 + shift})
# node weights
if explicit_node_wt:
nx.set_node_attributes(example_1, 1, NWL)
wtu = NWL
else:
wtu = None
# partitioning
clusters_1 = {
frozenset(x)
for x in lukes_partitioning(example_1, limit, node_weight=wtu, edge_weight=EWL)
}
return clusters_1
# second test from the Lukes original paper
def paper_2_case(explicit_edge_wt=True, directed=False):
# problem specific constants
byte_block_size = 32
# configuration
if directed:
example_2 = nx.DiGraph()
else:
example_2 = nx.Graph()
if explicit_edge_wt:
edic = {EWL: 1}
wtu = EWL
else:
edic = {}
wtu = None
# graph creation
example_2.add_edge("name", "home_address", **edic)
example_2.add_edge("name", "education", **edic)
example_2.add_edge("education", "bs", **edic)
example_2.add_edge("education", "ms", **edic)
example_2.add_edge("education", "phd", **edic)
example_2.add_edge("name", "telephone", **edic)
example_2.add_edge("telephone", "home", **edic)
example_2.add_edge("telephone", "office", **edic)
example_2.add_edge("office", "no1", **edic)
example_2.add_edge("office", "no2", **edic)
example_2.nodes["name"][NWL] = 20
example_2.nodes["education"][NWL] = 10
example_2.nodes["bs"][NWL] = 1
example_2.nodes["ms"][NWL] = 1
example_2.nodes["phd"][NWL] = 1
example_2.nodes["home_address"][NWL] = 8
example_2.nodes["telephone"][NWL] = 8
example_2.nodes["home"][NWL] = 8
example_2.nodes["office"][NWL] = 4
example_2.nodes["no1"][NWL] = 1
example_2.nodes["no2"][NWL] = 1
# partitioning
clusters_2 = {
frozenset(x)
for x in lukes_partitioning(
example_2, byte_block_size, node_weight=NWL, edge_weight=wtu
)
}
return clusters_2
def test_paper_1_case():
ground_truth = {frozenset([1, 4]), frozenset([2, 3, 5])}
tf = (True, False)
for flt, nwt, drc in product(tf, tf, tf):
part = paper_1_case(flt, nwt, drc)
assert part == ground_truth
def test_paper_2_case():
ground_truth = {
frozenset(["education", "bs", "ms", "phd"]),
frozenset(["name", "home_address"]),
frozenset(["telephone", "home", "office", "no1", "no2"]),
}
tf = (True, False)
for ewt, drc in product(tf, tf):
part = paper_2_case(ewt, drc)
assert part == ground_truth
def test_mandatory_tree():
not_a_tree = nx.complete_graph(4)
with pytest.raises(nx.NotATree):
lukes_partitioning(not_a_tree, 5)
def test_mandatory_integrality():
byte_block_size = 32
ex_1_broken = nx.DiGraph()
ex_1_broken.add_edge(1, 2, **{EWL: 3.2})
ex_1_broken.add_edge(1, 4, **{EWL: 2.4})
ex_1_broken.add_edge(2, 3, **{EWL: 4.0})
ex_1_broken.add_edge(2, 5, **{EWL: 6.3})
ex_1_broken.nodes[1][NWL] = 1.2 # !
ex_1_broken.nodes[2][NWL] = 1
ex_1_broken.nodes[3][NWL] = 1
ex_1_broken.nodes[4][NWL] = 1
ex_1_broken.nodes[5][NWL] = 2
with pytest.raises(TypeError):
lukes_partitioning(
ex_1_broken, byte_block_size, node_weight=NWL, edge_weight=EWL
)

View file

@ -0,0 +1,39 @@
import networkx as nx
from networkx.algorithms.community import (
greedy_modularity_communities,
naive_greedy_modularity_communities,
)
class TestCNM:
def setup(self):
self.G = nx.karate_club_graph()
def _check_communities(self, expected):
communities = set(greedy_modularity_communities(self.G))
assert communities == expected
def test_karate_club(self):
john_a = frozenset(
[8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
)
mr_hi = frozenset([0, 4, 5, 6, 10, 11, 16, 19])
overlap = frozenset([1, 2, 3, 7, 9, 12, 13, 17, 21])
self._check_communities({john_a, overlap, mr_hi})
class TestNaive:
def setup(self):
self.G = nx.karate_club_graph()
def _check_communities(self, expected):
communities = set(naive_greedy_modularity_communities(self.G))
assert communities == expected
def test_karate_club(self):
john_a = frozenset(
[8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
)
mr_hi = frozenset([0, 4, 5, 6, 10, 11, 16, 19])
overlap = frozenset([1, 2, 3, 7, 9, 12, 13, 17, 21])
self._check_communities({john_a, overlap, mr_hi})

View file

@ -0,0 +1,84 @@
"""Unit tests for the :mod:`networkx.algorithms.community.quality`
module.
"""
import networkx as nx
from networkx import barbell_graph
from networkx.algorithms.community import coverage
from networkx.algorithms.community import modularity
from networkx.algorithms.community import performance
from networkx.algorithms.community.quality import inter_community_edges
from networkx.testing import almost_equal
class TestPerformance:
"""Unit tests for the :func:`performance` function."""
def test_bad_partition(self):
"""Tests that a poor partition has a low performance measure."""
G = barbell_graph(3, 0)
partition = [{0, 1, 4}, {2, 3, 5}]
assert almost_equal(8 / 15, performance(G, partition))
def test_good_partition(self):
"""Tests that a good partition has a high performance measure.
"""
G = barbell_graph(3, 0)
partition = [{0, 1, 2}, {3, 4, 5}]
assert almost_equal(14 / 15, performance(G, partition))
class TestCoverage:
"""Unit tests for the :func:`coverage` function."""
def test_bad_partition(self):
"""Tests that a poor partition has a low coverage measure."""
G = barbell_graph(3, 0)
partition = [{0, 1, 4}, {2, 3, 5}]
assert almost_equal(3 / 7, coverage(G, partition))
def test_good_partition(self):
"""Tests that a good partition has a high coverage measure."""
G = barbell_graph(3, 0)
partition = [{0, 1, 2}, {3, 4, 5}]
assert almost_equal(6 / 7, coverage(G, partition))
def test_modularity():
G = nx.barbell_graph(3, 0)
C = [{0, 1, 4}, {2, 3, 5}]
assert almost_equal(-16 / (14 ** 2), modularity(G, C))
C = [{0, 1, 2}, {3, 4, 5}]
assert almost_equal((35 * 2) / (14 ** 2), modularity(G, C))
n = 1000
G = nx.erdos_renyi_graph(n, 0.09, seed=42, directed=True)
C = [set(range(n // 2)), set(range(n // 2, n))]
assert almost_equal(0.00017154251389292754, modularity(G, C))
G = nx.margulis_gabber_galil_graph(10)
mid_value = G.number_of_nodes() // 2
nodes = list(G.nodes)
C = [set(nodes[:mid_value]), set(nodes[mid_value:])]
assert almost_equal(0.13, modularity(G, C))
G = nx.DiGraph()
G.add_edges_from([(2, 1), (2, 3), (3, 4)])
C = [{1, 2}, {3, 4}]
assert almost_equal(2 / 9, modularity(G, C))
def test_inter_community_edges_with_digraphs():
G = nx.complete_graph(2, create_using=nx.DiGraph())
partition = [{0}, {1}]
assert inter_community_edges(G, partition) == 2
G = nx.complete_graph(10, create_using=nx.DiGraph())
partition = [{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}]
assert inter_community_edges(G, partition) == 70
G = nx.cycle_graph(4, create_using=nx.DiGraph())
partition = [{0, 1}, {2, 3}]
assert inter_community_edges(G, partition) == 2

View file

@ -0,0 +1,29 @@
"""Unit tests for the :mod:`networkx.algorithms.community.utils` module.
"""
import networkx as nx
from networkx.algorithms.community import is_partition
def test_is_partition():
G = nx.empty_graph(3)
assert is_partition(G, [{0, 1}, {2}])
assert is_partition(G, ({0, 1}, {2}))
assert is_partition(G, ([0, 1], [2]))
assert is_partition(G, [[0, 1], [2]])
def test_not_covering():
G = nx.empty_graph(3)
assert not is_partition(G, [{0}, {1}])
def test_not_disjoint():
G = nx.empty_graph(3)
assert not is_partition(G, [{0, 1}, {1, 2}])
def test_not_node():
G = nx.empty_graph(3)
assert not is_partition(G, [{0, 1}, {3}])